diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index bf0d06b8837..82e84ab5c36 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref) if (window_func->fix_fields(thd, ref)) return true; + fix_length_and_dec(); + max_length= window_func->max_length; fixed= 1; @@ -180,3 +182,27 @@ void Item_window_func::advance_window() } window_func->add(); } + +bool Item_sum_percent_rank::add() +{ + row_number++; + if (test_if_group_changed(orderby_fields) > -1) + { + /* Row value changed. */ + cur_rank= row_number; + } + return false; +} + +void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + for (ORDER *curr= window_spec->order_list.first; curr; curr=curr->next) + { + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); + orderby_fields.push_back(tmp); + } + clear(); +} + + diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 6277183ce58..5cb1e8aae32 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -70,6 +70,7 @@ public: class Item_sum_rank: public Item_sum_int { +protected: longlong row_number; // just ROW_NUMBER() longlong cur_rank; // current value @@ -108,7 +109,7 @@ public: { return "rank"; } - + void setup_window_func(THD *thd, Window_spec *window_spec); }; @@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int }; +/* TODO-cvicentiu + * Perhaps this is overengineering, but I would like to decouple the 2-pass + * algorithm from the specific action that must be performed during the + * first pass. The second pass can make use of the "add" function from the + * Item_sum_. + */ + +/* + This class represents a generic interface for window functions that need + to store aditional information. Such window functions include percent_rank + and cume_dist. +*/ +class Window_context +{ + public: + virtual void add_field_to_context(Field* field) = 0; + virtual void reset() = 0; + virtual ~Window_context() {}; +}; + +/* + A generic interface that specifies the datatype that the context represents. +*/ +template +class Window_context_getter +{ + protected: + virtual T get_field_context(const Field* field) = 0; + virtual ~Window_context_getter() {}; +}; + +/* + A window function context representing the number of rows that are present + with a partition. Because the number of rows is not dependent of the + specific value within the current field, we ignore the parameter + in this case. +*/ +class Window_context_row_count : + public Window_context, Window_context_getter +{ + public: + Window_context_row_count() : num_rows_(0) {}; + + void add_field_to_context(Field* field __attribute__((unused))) + { + num_rows_++; + } + + void reset() + { + num_rows_= 0; + } + + ulonglong get_field_context(const Field* field __attribute__((unused))) + { + return num_rows_; + } + private: + ulonglong num_rows_; +}; + +class Window_context_row_and_group_count : + public Window_context, Window_context_getter > +{ + public: + Window_context_row_and_group_count(void * group_list) {} +}; + +/* + An abstract class representing an item that holds a context. +*/ +class Item_context +{ + public: + Item_context() : context_(NULL) {} + Window_context* get_window_context() { return context_; } + + virtual bool create_window_context() = 0; + virtual void delete_window_context() = 0; + + protected: + Window_context* context_; +}; + +/* + A base window function (aggregate) that also holds a context. + + NOTE: All two pass window functions need to implement + this interface. +*/ +class Item_sum_window_with_context : public Item_sum_num, + public Item_context +{ + public: + Item_sum_window_with_context(THD *thd) + : Item_sum_num(thd), Item_context() {} +}; /* @detail @@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int Computation of this function requires two passes: - First pass to find #rows in the partition + This is held within the row_count context. - Second pass to compute rank of current row and the value of the function */ - -class Item_sum_percent_rank: public Item_sum_num +class Item_sum_percent_rank: public Item_sum_window_with_context, + public Window_context_row_count { - longlong rank; - longlong partition_rows; - - void clear() {} - bool add() { return false; } - void update_field() {} - public: Item_sum_percent_rank(THD *thd) - : Item_sum_num(thd), rank(0), partition_rows(0) {} + : Item_sum_window_with_context(thd), cur_rank(1) {} - double val_real() { return 0; } + longlong val_int() + { + /* + Percent rank is a real value so calling the integer value should never + happen. It makes no sense as it gets truncated to either 0 or 1. + */ + DBUG_ASSERT(0); + return 0; + } + + double val_real() + { + /* + We can not get the real value without knowing the number of rows + in the partition. Don't divide by 0. + */ + if (!get_context_()) + { + // Calling this kind of function with a context makes no sense. + DBUG_ASSERT(0); + return 0; + } + + longlong partition_rows = get_context_()->get_field_context(result_field); + return partition_rows > 1 ? + static_cast(cur_rank - 1) / (partition_rows - 1) : 0; + } enum Sumfunctype sum_func () const { @@ -204,12 +322,61 @@ class Item_sum_percent_rank: public Item_sum_num { return "percent_rank"; } - + + bool create_window_context() + { + // TODO-cvicentiu: Currently this means we must make sure to delete + // the window context. We can potentially allocate this on the THD memroot. + // At the same time, this is only necessary for a small portion of the + // query execution and it does not make sense to keep it for all of it. + context_ = new Window_context_row_count(); + if (context_ == NULL) + return true; + return false; + } + + void delete_window_context() + { + if (context_) + delete get_context_(); + context_ = NULL; + } + + void update_field() {} + + void clear() + { + cur_rank= 1; + row_number= 0; + } + bool add(); + enum Item_result result_type () const { return REAL_RESULT; } enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + void setup_window_func(THD *thd, Window_spec *window_spec); + + private: + longlong cur_rank; // Current rank of the current row. + longlong row_number; // Value if this were ROW_NUMBER() function. + + List orderby_fields; + + /* Helper function so that we don't cast the context every time. */ + Window_context_row_count* get_context_() + { + return static_cast(context_); + } }; + + /* @detail "The relative rank of a row R is defined as NP/NR, where @@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num two passes. */ -class Item_sum_cume_dist: public Item_sum_num +class Item_sum_cume_dist: public Item_sum_percent_rank { - longlong count; - longlong partition_rows; - - void clear() {} - bool add() { return false; } - void update_field() {} - public: Item_sum_cume_dist(THD *thd) - : Item_sum_num(thd), count(0), partition_rows(0) {} + : Item_sum_percent_rank(thd) {} double val_real() { return 0; } @@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num { return "cume_dist"; } - - enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } - }; @@ -331,7 +488,7 @@ public: } longlong val_int() - { + { if (force_return_blank) return 0; return read_value_from_result_field? result_field->val_int() : @@ -361,15 +518,14 @@ public: List &fields, uint flags); void fix_length_and_dec() { - window_func->fix_length_and_dec(); + decimals = window_func->decimals; } const char* func_name() const { return "WF"; } bool fix_fields(THD *thd, Item **ref); - - bool resolve_window_name(THD *thd); + bool resolve_window_name(THD *thd); }; #endif /* ITEM_WINDOWFUNC_INCLUDED */ diff --git a/sql/sql_window.cc b/sql/sql_window.cc index c7dcfb73341..29fa938a2a6 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win, } +bool compute_two_pass_window_functions(Item_window_func *item_win, + TABLE *table, READ_RECORD *info) +{ + /* Perform first pass. */ + + // TODO-cvicentiu why not initialize the record for when we need, _in_ + // this function. + READ_RECORD *info2= new READ_RECORD(); + int err; + bool is_error = false; + bool first_row= true; + clone_read_record(info, info2); + Item_sum_window_with_context *window_func= + static_cast(item_win->window_func); + uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0)); + + is_error= window_func->create_window_context(); + /* Unable to allocate a new context. */ + if (is_error) + return true; + + Window_context *context = window_func->get_window_context(); + /* + The two pass algorithm is as follows: + We have a sorted table according to the partition and order by clauses. + 1. Scan through the table till we reach a partition boundary. + 2. For each row that we scan, add it to the context. + 3. Once the partition boundary is met, do a second scan through the + current partition and use the context information to compute the value for + the window function for that partition. + 4. Reset the context. + 5. Repeat from 1 till end of table. + */ + + bool done = false; + longlong rows_in_current_partition = 0; + // TODO handle end of table updating. + while (!done) + { + + if ((err= info->read_record(info))) + { + done = true; + } + + bool partition_changed= (done || item_win->check_partition_bound() > -1) ? + true : false; + // The first time we always have a partition changed. Ignore it. + if (first_row) + { + partition_changed= false; + first_row= false; + } + + if (partition_changed) + { + /* + We are now looking at the first row for the next partition, or at the + end of the table. Either way, we must remember this position for when + we finish doing the second pass. + */ + table->file->position(table->record[0]); + memcpy(rowid_buf, table->file->ref, table->file->ref_length); + + for (longlong row_number = 0; row_number < rows_in_current_partition; + row_number++) + { + if ((err= info2->read_record(info2))) + { + is_error= true; + break; + } + window_func->add(); + // Save the window function into the table. + item_win->save_in_field(item_win->result_field, true); + err= table->file->ha_update_row(table->record[1], table->record[0]); + if (err && err != HA_ERR_RECORD_IS_THE_SAME) + { + is_error= true; + break; + } + } + + if (is_error) + break; + + rows_in_current_partition= 0; + window_func->clear(); + context->reset(); + + // Return to the beginning of the new partition. + table->file->ha_rnd_pos(table->record[0], rowid_buf); + } + rows_in_current_partition++; + context->add_field_to_context(item_win->result_field); + } + + window_func->delete_window_context(); + delete info2; + my_free(rowid_buf); + return is_error; +} + + /* @brief This function is called by JOIN::exec to compute window function values @@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List *curr_fields_list) is_error= true; break; } + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + { + if (compute_two_pass_window_functions(item_win, tbl, &info)) + is_error= true; + break; + } case Item_sum::COUNT_FUNC: { /*