diff --git a/mysql-test/r/win_lead_lag.result b/mysql-test/r/win_lead_lag.result new file mode 100644 index 00000000000..5ead58fa11a --- /dev/null +++ b/mysql-test/r/win_lead_lag.result @@ -0,0 +1,229 @@ +create table t1 ( +pk int primary key, +a int, +b int, +c char(10), +d decimal(10, 3), +e real +); +insert into t1 values +( 1, 0, 1, 'one', 0.1, 0.001), +( 2, 0, 2, 'two', 0.2, 0.002), +( 3, 0, 3, 'three', 0.3, 0.003), +( 4, 1, 2, 'three', 0.4, 0.004), +( 5, 1, 1, 'two', 0.5, 0.005), +( 6, 1, 1, 'one', 0.6, 0.006), +( 7, 2, NULL, 'n_one', 0.5, 0.007), +( 8, 2, 1, 'n_two', NULL, 0.008), +( 9, 2, 2, NULL, 0.7, 0.009), +(10, 2, 0, 'n_four', 0.8, 0.010), +(11, 2, 10, NULL, 0.9, NULL); +select pk, +lead(pk) over (order by pk), +lead(pk, 1) over (order by pk), +lead(pk, 2) over (order by pk), +lead(pk, 0) over (order by pk), +lead(pk, -1) over (order by pk), +lead(pk, -2) over (order by pk) +from t1 +order by pk asc; +pk lead(pk) over (order by pk) lead(pk, 1) over (order by pk) lead(pk, 2) over (order by pk) lead(pk, 0) over (order by pk) lead(pk, -1) over (order by pk) lead(pk, -2) over (order by pk) +1 2 2 3 1 NULL NULL +2 3 3 4 2 1 NULL +3 4 4 5 3 2 1 +4 5 5 6 4 3 2 +5 6 6 7 5 4 3 +6 7 7 8 6 5 4 +7 8 8 9 7 6 5 +8 9 9 10 8 7 6 +9 10 10 11 9 8 7 +10 11 11 NULL 10 9 8 +11 NULL NULL NULL 11 10 9 +select pk, +lag(pk) over (order by pk), +lag(pk, 1) over (order by pk), +lag(pk, 2) over (order by pk), +lag(pk, 0) over (order by pk), +lag(pk, -1) over (order by pk), +lag(pk, -2) over (order by pk) +from t1 +order by pk asc; +pk lag(pk) over (order by pk) lag(pk, 1) over (order by pk) lag(pk, 2) over (order by pk) lag(pk, 0) over (order by pk) lag(pk, -1) over (order by pk) lag(pk, -2) over (order by pk) +1 NULL NULL NULL 1 2 3 +2 1 1 NULL 2 3 4 +3 2 2 1 3 4 5 +4 3 3 2 4 5 6 +5 4 4 3 5 6 7 +6 5 5 4 6 7 8 +7 6 6 5 7 8 9 +8 7 7 6 8 9 10 +9 8 8 7 9 10 11 +10 9 9 8 10 11 NULL +11 10 10 9 11 NULL NULL +select pk, pk - 2, +lag(pk, pk - 2) over (order by pk), +lead(pk, pk - 2) over (order by pk) +from t1 +order by pk asc; +pk pk - 2 lag(pk, pk - 2) over (order by pk) lead(pk, pk - 2) over (order by pk) +1 -1 2 NULL +2 0 2 2 +3 1 2 4 +4 2 2 6 +5 3 2 8 +6 4 2 10 +7 5 2 NULL +8 6 2 NULL +9 7 2 NULL +10 8 2 NULL +11 9 2 NULL +select pk, pk - 2, +lag(pk, pk + 2) over (order by pk), +lead(pk, pk + 2) over (order by pk) +from t1 +order by pk asc; +pk pk - 2 lag(pk, pk + 2) over (order by pk) lead(pk, pk + 2) over (order by pk) +1 -1 NULL 4 +2 0 NULL 6 +3 1 NULL 8 +4 2 NULL 10 +5 3 NULL NULL +6 4 NULL NULL +7 5 NULL NULL +8 6 NULL NULL +9 7 NULL NULL +10 8 NULL NULL +11 9 NULL NULL +select pk, a, +lead(pk) over (partition by a order by pk), +lead(pk, 1) over (partition by a order by pk), +lead(pk, 2) over (partition by a order by pk), +lead(pk, 0) over (partition by a order by pk), +lead(pk, -1) over (partition by a order by pk), +lead(pk, -2) over (partition by a order by pk) +from t1 +order by pk asc; +pk a lead(pk) over (partition by a order by pk) lead(pk, 1) over (partition by a order by pk) lead(pk, 2) over (partition by a order by pk) lead(pk, 0) over (partition by a order by pk) lead(pk, -1) over (partition by a order by pk) lead(pk, -2) over (partition by a order by pk) +1 0 2 2 3 1 NULL NULL +2 0 3 3 NULL 2 1 NULL +3 0 NULL NULL NULL 3 2 1 +4 1 5 5 6 4 NULL NULL +5 1 6 6 NULL 5 4 NULL +6 1 NULL NULL NULL 6 5 4 +7 2 8 8 9 7 NULL NULL +8 2 9 9 10 8 7 NULL +9 2 10 10 11 9 8 7 +10 2 11 11 NULL 10 9 8 +11 2 NULL NULL NULL 11 10 9 +select pk, a, +lag(pk) over (partition by a order by pk), +lag(pk, 1) over (partition by a order by pk), +lag(pk, 2) over (partition by a order by pk), +lag(pk, 0) over (partition by a order by pk), +lag(pk, -1) over (partition by a order by pk), +lag(pk, -2) over (partition by a order by pk) +from t1 +order by pk asc; +pk a lag(pk) over (partition by a order by pk) lag(pk, 1) over (partition by a order by pk) lag(pk, 2) over (partition by a order by pk) lag(pk, 0) over (partition by a order by pk) lag(pk, -1) over (partition by a order by pk) lag(pk, -2) over (partition by a order by pk) +1 0 NULL NULL NULL 1 2 3 +2 0 1 1 NULL 2 3 NULL +3 0 2 2 1 3 NULL NULL +4 1 NULL NULL NULL 4 5 6 +5 1 4 4 NULL 5 6 NULL +6 1 5 5 4 6 NULL NULL +7 2 NULL NULL NULL 7 8 9 +8 2 7 7 NULL 8 9 10 +9 2 8 8 7 9 10 11 +10 2 9 9 8 10 11 NULL +11 2 10 10 9 11 NULL NULL +select pk, a, pk - 2, +lag(pk, pk - 2) over (partition by a order by pk), +lead(pk, pk - 2) over (partition by a order by pk), +lag(pk, a - 2) over (partition by a order by pk), +lead(pk, a - 2) over (partition by a order by pk) +from t1 +order by pk asc; +pk a pk - 2 lag(pk, pk - 2) over (partition by a order by pk) lead(pk, pk - 2) over (partition by a order by pk) lag(pk, a - 2) over (partition by a order by pk) lead(pk, a - 2) over (partition by a order by pk) +1 0 -1 2 NULL 3 NULL +2 0 0 2 2 NULL NULL +3 0 1 2 NULL NULL 1 +4 1 2 NULL 6 5 NULL +5 1 3 NULL NULL 6 4 +6 1 4 NULL NULL NULL 5 +7 2 5 NULL NULL 7 7 +8 2 6 NULL NULL 8 8 +9 2 7 NULL NULL 9 9 +10 2 8 NULL NULL 10 10 +11 2 9 NULL NULL 11 11 +select pk, a, pk - 2, +lag(pk, pk + 2) over (partition by a order by pk), +lead(pk, pk + 2) over (partition by a order by pk), +lag(pk, a + 2) over (partition by a order by pk), +lead(pk, a + 2) over (partition by a order by pk) +from t1 +order by pk asc; +pk a pk - 2 lag(pk, pk + 2) over (partition by a order by pk) lead(pk, pk + 2) over (partition by a order by pk) lag(pk, a + 2) over (partition by a order by pk) lead(pk, a + 2) over (partition by a order by pk) +1 0 -1 NULL NULL NULL 3 +2 0 0 NULL NULL NULL NULL +3 0 1 NULL NULL 1 NULL +4 1 2 NULL NULL NULL NULL +5 1 3 NULL NULL NULL NULL +6 1 4 NULL NULL NULL NULL +7 2 5 NULL NULL NULL 11 +8 2 6 NULL NULL NULL NULL +9 2 7 NULL NULL NULL NULL +10 2 8 NULL NULL NULL NULL +11 2 9 NULL NULL 7 NULL +select pk, a, b, c, d, e, +lag(a) over (partition by a order by pk), +lag(b) over (partition by a order by pk), +lag(c) over (partition by a order by pk), +lag(d) over (partition by a order by pk), +lag(e) over (partition by a order by pk) +from t1 +order by pk asc; +pk a b c d e lag(a) over (partition by a order by pk) lag(b) over (partition by a order by pk) lag(c) over (partition by a order by pk) lag(d) over (partition by a order by pk) lag(e) over (partition by a order by pk) +1 0 1 one 0.100 0.001 NULL NULL NULL NULL NULL +2 0 2 two 0.200 0.002 0 1 one 0.100 0.001 +3 0 3 three 0.300 0.003 0 2 two 0.200 0.002 +4 1 2 three 0.400 0.004 NULL NULL NULL NULL NULL +5 1 1 two 0.500 0.005 1 2 three 0.400 0.004 +6 1 1 one 0.600 0.006 1 1 two 0.500 0.005 +7 2 NULL n_one 0.500 0.007 NULL NULL NULL NULL NULL +8 2 1 n_two NULL 0.008 2 NULL n_one 0.500 0.007 +9 2 2 NULL 0.700 0.009 2 1 n_two NULL 0.008 +10 2 0 n_four 0.800 0.01 2 2 NULL 0.700 0.009 +11 2 10 NULL 0.900 NULL 2 0 n_four 0.800 0.01 +select pk, a, b, a+b, +lag(a + b) over (partition by a order by pk) +from t1 +order by pk asc; +pk a b a+b lag(a + b) over (partition by a order by pk) +1 0 1 1 NULL +2 0 2 2 1 +3 0 3 3 2 +4 1 2 3 NULL +5 1 1 2 3 +6 1 1 2 2 +7 2 NULL NULL NULL +8 2 1 3 NULL +9 2 2 4 3 +10 2 0 2 4 +11 2 10 12 2 +select pk, a, b, a+b, +lag(a + b) over (partition by a order by pk) + pk +from t1 +order by pk asc; +pk a b a+b lag(a + b) over (partition by a order by pk) + pk +1 0 1 1 NULL +2 0 2 2 3 +3 0 3 3 5 +4 1 2 3 NULL +5 1 1 2 8 +6 1 1 2 8 +7 2 NULL NULL NULL +8 2 1 3 NULL +9 2 2 4 12 +10 2 0 2 14 +11 2 10 12 13 +drop table t1; diff --git a/mysql-test/t/win_lead_lag.test b/mysql-test/t/win_lead_lag.test new file mode 100644 index 00000000000..2824f83789c --- /dev/null +++ b/mysql-test/t/win_lead_lag.test @@ -0,0 +1,110 @@ +create table t1 ( + pk int primary key, + a int, + b int, + c char(10), + d decimal(10, 3), + e real +); + +insert into t1 values +( 1, 0, 1, 'one', 0.1, 0.001), +( 2, 0, 2, 'two', 0.2, 0.002), +( 3, 0, 3, 'three', 0.3, 0.003), +( 4, 1, 2, 'three', 0.4, 0.004), +( 5, 1, 1, 'two', 0.5, 0.005), +( 6, 1, 1, 'one', 0.6, 0.006), +( 7, 2, NULL, 'n_one', 0.5, 0.007), +( 8, 2, 1, 'n_two', NULL, 0.008), +( 9, 2, 2, NULL, 0.7, 0.009), +(10, 2, 0, 'n_four', 0.8, 0.010), +(11, 2, 10, NULL, 0.9, NULL); + +select pk, + lead(pk) over (order by pk), + lead(pk, 1) over (order by pk), + lead(pk, 2) over (order by pk), + lead(pk, 0) over (order by pk), + lead(pk, -1) over (order by pk), + lead(pk, -2) over (order by pk) +from t1 +order by pk asc; + +select pk, + lag(pk) over (order by pk), + lag(pk, 1) over (order by pk), + lag(pk, 2) over (order by pk), + lag(pk, 0) over (order by pk), + lag(pk, -1) over (order by pk), + lag(pk, -2) over (order by pk) +from t1 +order by pk asc; + +select pk, pk - 2, + lag(pk, pk - 2) over (order by pk), + lead(pk, pk - 2) over (order by pk) +from t1 +order by pk asc; + +select pk, pk - 2, + lag(pk, pk + 2) over (order by pk), + lead(pk, pk + 2) over (order by pk) +from t1 +order by pk asc; + +select pk, a, + lead(pk) over (partition by a order by pk), + lead(pk, 1) over (partition by a order by pk), + lead(pk, 2) over (partition by a order by pk), + lead(pk, 0) over (partition by a order by pk), + lead(pk, -1) over (partition by a order by pk), + lead(pk, -2) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, + lag(pk) over (partition by a order by pk), + lag(pk, 1) over (partition by a order by pk), + lag(pk, 2) over (partition by a order by pk), + lag(pk, 0) over (partition by a order by pk), + lag(pk, -1) over (partition by a order by pk), + lag(pk, -2) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, pk - 2, + lag(pk, pk - 2) over (partition by a order by pk), + lead(pk, pk - 2) over (partition by a order by pk), + lag(pk, a - 2) over (partition by a order by pk), + lead(pk, a - 2) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, pk - 2, + lag(pk, pk + 2) over (partition by a order by pk), + lead(pk, pk + 2) over (partition by a order by pk), + lag(pk, a + 2) over (partition by a order by pk), + lead(pk, a + 2) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, b, c, d, e, + lag(a) over (partition by a order by pk), + lag(b) over (partition by a order by pk), + lag(c) over (partition by a order by pk), + lag(d) over (partition by a order by pk), + lag(e) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, b, a+b, + lag(a + b) over (partition by a order by pk) +from t1 +order by pk asc; + +select pk, a, b, a+b, + lag(a + b) over (partition by a order by pk) + pk +from t1 +order by pk asc; + +drop table t1; diff --git a/sql/item_sum.h b/sql/item_sum.h index d7d583a6fed..b9075db0196 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -350,7 +350,8 @@ public: AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC, ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC, - CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC + CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC, + NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC }; Item **ref_by; /* pointer to a ref to the object used to register it */ diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 5a7ee522d44..ccdbdd3294f 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -229,16 +229,220 @@ bool Item_sum_first_value::add() For this usecase we can actually get rid of arg_cache. arg_cache is just for running a comparison function. */ value_added= true; - arg_cache->cache_value(); - value->store(arg_cache); - null_value= arg_cache->null_value; + Item_sum_hybrid_simple::add(); return false; } -bool Item_sum_last_value::add() +bool Item_sum_hybrid_simple::fix_fields(THD *thd, Item **ref) { - arg_cache->cache_value(); - value->store(arg_cache); - null_value= arg_cache->null_value; + DBUG_ASSERT(fixed == 0); + + if (init_sum_func_check(thd)) + return TRUE; + + for (uint i= 0; i < arg_count; i++) + { + Item *item= args[i]; + // 'item' can be changed during fix_fields + if ((!item->fixed && item->fix_fields(thd, args)) || + (item= args[i])->check_cols(1)) + return TRUE; + } + Type_std_attributes::set(args[0]); + for (uint i= 0; i < arg_count && !with_subselect; i++) + with_subselect= with_subselect || args[i]->with_subselect; + + Item *item2= args[0]->real_item(); + if (item2->type() == Item::FIELD_ITEM) + set_handler_by_field_type(((Item_field*) item2)->field->type()); + else if (args[0]->cmp_type() == TIME_RESULT) + set_handler_by_field_type(item2->field_type()); + else + set_handler_by_result_type(item2->result_type(), + max_length, collation.collation); + + switch (Item_sum_hybrid_simple::result_type()) { + case INT_RESULT: + case DECIMAL_RESULT: + case STRING_RESULT: + break; + case REAL_RESULT: + max_length= float_length(decimals); + break; + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); // XXX(cvicentiu) Should this never happen? + return TRUE; + }; + setup_hybrid(thd, args[0]); + /* MIN/MAX can return NULL for empty set indepedent of the used column */ + maybe_null= 1; + result_field=0; + null_value=1; + fix_length_and_dec(); + + if (check_sum_func(thd, ref)) + return TRUE; + for (uint i= 0; i < arg_count; i++) + { + orig_args[i]= args[i]; + } + fixed= 1; + return FALSE; +} + +bool Item_sum_hybrid_simple::add() +{ + value->store(args[0]); + value->cache_value(); + null_value= value->null_value; return false; } + +void Item_sum_hybrid_simple::setup_hybrid(THD *thd, Item *item) +{ + if (!(value= Item_cache::get_cache(thd, item, item->cmp_type()))) + return; + value->setup(thd, item); + value->store(item); + if (!item->const_item()) + value->set_used_tables(RAND_TABLE_BIT); + collation.set(item->collation); +} + +double Item_sum_hybrid_simple::val_real() +{ + DBUG_ASSERT(fixed == 1); + if (null_value) + return 0.0; + double retval= value->val_real(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0.0); + return retval; +} + +longlong Item_sum_hybrid_simple::val_int() +{ + DBUG_ASSERT(fixed == 1); + if (null_value) + return 0; + longlong retval= value->val_int(); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == 0); + return retval; +} + +my_decimal *Item_sum_hybrid_simple::val_decimal(my_decimal *val) +{ + DBUG_ASSERT(fixed == 1); + if (null_value) + return 0; + my_decimal *retval= value->val_decimal(val); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + return retval; +} + +String * +Item_sum_hybrid_simple::val_str(String *str) +{ + DBUG_ASSERT(fixed == 1); + if (null_value) + return 0; + String *retval= value->val_str(str); + if ((null_value= value->null_value)) + DBUG_ASSERT(retval == NULL); + return retval; +} + +Field *Item_sum_hybrid_simple::create_tmp_field(bool group, TABLE *table) +{ + DBUG_ASSERT(0); + return NULL; +} + +void Item_sum_hybrid_simple::reset_field() +{ + switch(Item_sum_hybrid_simple::result_type()) { + case STRING_RESULT: + { + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),result_field->charset()),*res; + + res=args[0]->val_str(&tmp); + if (args[0]->null_value) + { + result_field->set_null(); + result_field->reset(); + } + else + { + result_field->set_notnull(); + result_field->store(res->ptr(),res->length(),tmp.charset()); + } + break; + } + case INT_RESULT: + { + longlong nr=args[0]->val_int(); + + if (maybe_null) + { + if (args[0]->null_value) + { + nr=0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + result_field->store(nr, unsigned_flag); + break; + } + case REAL_RESULT: + { + double nr= args[0]->val_real(); + + if (maybe_null) + { + if (args[0]->null_value) + { + nr=0.0; + result_field->set_null(); + } + else + result_field->set_notnull(); + } + result_field->store(nr); + break; + } + case DECIMAL_RESULT: + { + my_decimal value_buff, *arg_dec= args[0]->val_decimal(&value_buff); + + if (maybe_null) + { + if (args[0]->null_value) + result_field->set_null(); + else + result_field->set_notnull(); + } + /* + We must store zero in the field as we will use the field value in + add() + */ + if (!arg_dec) // Null + arg_dec= &decimal_zero; + result_field->store_decimal(arg_dec); + break; + } + case ROW_RESULT: + case TIME_RESULT: + DBUG_ASSERT(0); + } +} + +void Item_sum_hybrid_simple::update_field() +{ + DBUG_ASSERT(0); +} diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index cf484e3129d..b3d0b7a15f3 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -279,23 +279,59 @@ class Item_sum_dense_rank: public Item_sum_int { return get_item_copy(thd, mem_root, this); } }; +class Item_sum_hybrid_simple : public Item_sum, + public Type_handler_hybrid_field_type +{ + public: + Item_sum_hybrid_simple(THD *thd, Item *arg): + Item_sum(thd, arg), + Type_handler_hybrid_field_type(MYSQL_TYPE_LONGLONG), + value(NULL) + { collation.set(&my_charset_bin); } + + Item_sum_hybrid_simple(THD *thd, Item *arg1, Item *arg2): + Item_sum(thd, arg1, arg2), + Type_handler_hybrid_field_type(MYSQL_TYPE_LONGLONG), + value(NULL) + { collation.set(&my_charset_bin); } + + bool add(); + bool fix_fields(THD *, Item **); + void setup_hybrid(THD *thd, Item *item); + double val_real(); + longlong val_int(); + my_decimal *val_decimal(my_decimal *); + void reset_field(); + String *val_str(String *); + /* TODO(cvicentiu) copied from Item_sum_hybrid, what does it do? */ + bool keep_field_type(void) const { return 1; } + enum Item_result result_type() const + { return Type_handler_hybrid_field_type::result_type(); } + enum Item_result cmp_type() const + { return Type_handler_hybrid_field_type::cmp_type(); } + enum enum_field_types field_type() const + { return Type_handler_hybrid_field_type::field_type(); } + void update_field(); + Field *create_tmp_field(bool group, TABLE *table); + void clear() + { + value->clear(); + null_value= 1; + } + + private: + Item_cache *value; +}; + /* This item will remember the first value added to it. It will not update the value unless it is cleared. - - TODO(cvicentiu) Item_sum_hybrid is a pretty heavyweight class. It holds - logic that allows comparing values. It was generally thought out for MIN/MAX - functions, but we can use it here as well. - Refactor Item_sum_hybrid to only include basic field handling and - make a more specialized class for min/max. It might be useful if we'd like - to optimize how min/max is computed as a window function. We can potentially - implement a PQ within the specialized class to support removal. */ -class Item_sum_first_value : public Item_sum_hybrid +class Item_sum_first_value : public Item_sum_hybrid_simple { public: Item_sum_first_value(THD* thd, Item* arg_expr) : - Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */), + Item_sum_hybrid_simple(thd, arg_expr), value_added(false) {} bool add(); @@ -303,7 +339,7 @@ class Item_sum_first_value : public Item_sum_hybrid void clear() { value_added= false; - Item_sum_hybrid::clear(); + Item_sum_hybrid_simple::clear(); } enum Sumfunctype sum_func () const @@ -329,14 +365,13 @@ class Item_sum_first_value : public Item_sum_hybrid This item does not support removal, and can be cleared only by calling clear(). */ -class Item_sum_last_value : public Item_sum_hybrid +class Item_sum_last_value : public Item_sum_hybrid_simple { public: Item_sum_last_value(THD* thd, Item* arg_expr) : - Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */) {} + Item_sum_hybrid_simple(thd, arg_expr) {} - bool add(); - enum Sumfunctype sum_func () const + enum Sumfunctype sum_func() const { return LAST_VALUE_FUNC; } @@ -350,6 +385,91 @@ class Item_sum_last_value : public Item_sum_hybrid { return get_item_copy(thd, mem_root, this); } }; +class Item_sum_nth_value : public Item_sum_last_value +{ + public: + Item_sum_nth_value(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_last_value(thd, arg_expr) { + /* TODO(cvicentiu) This is messy. Item_args starts with 2 args by chance. + Clean this up by pulling out the common code from Item_sum_hybrid! */ + arg_count= 2; + args[1]= offset_expr; + } + + bool fix_fields(THD *thd, Item **ref) + { + Item *offset= args[1]; + if (offset->fix_fields(thd, args)) + return true; + /* Fix fields for the second argument as well. */ + orig_args[1]= offset; + /* Item_sum_last_value fixes fields for first argument only. */ + if (Item_sum_last_value::fix_fields(thd, ref)) + return true; + + return false; + } + + bool add() + { + Item_sum_last_value::add(); + return false; + } + + enum Sumfunctype sum_func() const + { + return NTH_VALUE_FUNC; + } + + const char*func_name() const + { + return "nth_value"; + } + + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy(thd, mem_root, this); } +}; + +class Item_sum_lead : public Item_sum_hybrid_simple +{ + public: + Item_sum_lead(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {} + + enum Sumfunctype sum_func() const + { + return LEAD_FUNC; + } + + const char*func_name() const + { + return "lead"; + } + + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy(thd, mem_root, this); } +}; + +class Item_sum_lag : public Item_sum_hybrid_simple +{ + public: + Item_sum_lag(THD *thd, Item *arg_expr, Item* offset_expr) : + Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {} + + enum Sumfunctype sum_func() const + { + return LAG_FUNC; + } + + const char*func_name() const + { + return "lag"; + } + + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy(thd, mem_root, this); } +}; + /* A base window function (aggregate) that also holds a counter for the number of rows. @@ -668,6 +788,8 @@ public: switch (window_func()->sum_func()) { case Item_sum::FIRST_VALUE_FUNC: case Item_sum::LAST_VALUE_FUNC: + case Item_sum::LAG_FUNC: + case Item_sum::LEAD_FUNC: return true; default: return false; diff --git a/sql/lex.h b/sql/lex.h index 90dac2eb294..003e88d2d9d 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -697,11 +697,14 @@ static SYMBOL sql_functions[] = { { "EXTRACT", SYM(EXTRACT_SYM)}, { "FIRST_VALUE", SYM(FIRST_VALUE_SYM)}, { "GROUP_CONCAT", SYM(GROUP_CONCAT_SYM)}, + { "LAG", SYM(LAG_SYM)}, + { "LEAD", SYM(LEAD_SYM)}, { "MAX", SYM(MAX_SYM)}, { "MID", SYM(SUBSTRING)}, /* unireg function */ { "MIN", SYM(MIN_SYM)}, { "NOW", SYM(NOW_SYM)}, - { "NTILE", SYM(NTILE_SYM)}, + { "NTH_VALUE", SYM(NTH_VALUE_SYM)}, + { "NTILE", SYM(NTILE_SYM)}, { "POSITION", SYM(POSITION_SYM)}, { "PERCENT_RANK", SYM(PERCENT_RANK_SYM)}, { "RANK", SYM(RANK_SYM)}, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 761f6c4b130..1f79010e993 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -26243,7 +26243,15 @@ AGGR_OP::end_send() corresponding temp table fields. Do this for each row in the table. */ if (join_tab->window_funcs_step) - copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd); + { + Item **func_ptr= join_tab->tmp_table_param->items_to_copy; + Item *func; + for (; (func = *func_ptr) ; func_ptr++) + { + if (func->with_window_func) + func->save_in_result_field(true); + } + } rc= evaluate_join_record(join, join_tab, 0); } } diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 15045924e83..1a111e52c75 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -273,7 +273,7 @@ int compare_order_lists(SQL_I_List *part_list1, return CMP_GT_C; if (elem2) return CMP_LT_C; - return CMP_EQ; + return CMP_EQ; } @@ -686,7 +686,17 @@ public: if ((res= Table_read_cursor::next()) || (res= fetch())) + { + /* TODO(cvicentiu) This does not consider table read failures. + Perhaps assuming end of table like this is fine in that case. */ + + /* This row is the final row in the table. To maintain semantics + that cursors always point to the last valid row, move back one step, + but mark end_of_partition as true. */ + Table_read_cursor::prev(); + end_of_partition= true; return res; + } if (bound_tracker.compare_with_cache()) { @@ -1886,19 +1896,25 @@ private: /* A cursor that follows a target cursor. Each time a new row is added, the window functions are cleared and only have the row at which the target is point at added to them. + + The window functions are cleared if the bounds or the position cursors are + outside computational bounds. */ class Frame_positional_cursor : public Frame_cursor { public: Frame_positional_cursor(const Frame_cursor &position_cursor) : - position_cursor(position_cursor), bound(NULL), offset(NULL), + position_cursor(position_cursor), top_bound(NULL), + bottom_bound(NULL), offset(NULL), overflowed(false), negative_offset(false) {} Frame_positional_cursor(const Frame_cursor &position_cursor, - const Frame_cursor &bound, + const Frame_cursor &top_bound, + const Frame_cursor &bottom_bound, Item &offset, bool negative_offset) : - position_cursor(position_cursor), bound(&bound), offset(&offset), + position_cursor(position_cursor), top_bound(&top_bound), + bottom_bound(&bottom_bound), offset(&offset), negative_offset(negative_offset) {} void init(READ_RECORD *info) @@ -1908,35 +1924,26 @@ class Frame_positional_cursor : public Frame_cursor void pre_next_partition(ha_rows rownum) { - clear_sum_functions(); + /* The offset is dependant on the current row values. We can only get + * it here accurately. When fetching other rows, it changes. */ + save_offset_value(); } void next_partition(ha_rows rownum) { - ha_rows position= get_current_position(); - if (position_is_within_bounds(position)) - { - cursor.move_to(position); - cursor.fetch(); - add_value_to_items(); - } + save_positional_value(); } void pre_next_row() { + /* The offset is dependant on the current row values. We can only get + * it here accurately. When fetching other rows, it changes. */ + save_offset_value(); } void next_row() { - ha_rows position= get_current_position(); - if (!position_is_within_bounds(position)) - clear_sum_functions(); - else - { - cursor.move_to(position_cursor.get_curr_rownum()); - cursor.fetch(); - add_value_to_items(); - } + save_positional_value(); } ha_rows get_curr_rownum() const @@ -1947,30 +1954,25 @@ class Frame_positional_cursor : public Frame_cursor private: /* Check if a our position is within bounds. * The position is passed as a parameter to avoid recalculating it. */ - bool position_is_within_bounds(ha_rows position) + bool position_is_within_bounds() { if (!offset) return !position_cursor.is_outside_computation_bounds(); - /* No valid bound to compare to. */ - if (position_cursor.is_outside_computation_bounds() || - bound->is_outside_computation_bounds()) + if (overflowed) return false; - if (negative_offset) - { - if (position_cursor.get_curr_rownum() < position) - return false; /* Overflow below 0. */ - if (position < bound->get_curr_rownum()) /* We are over the bound. */ - return false; - } - else - { - if (position_cursor.get_curr_rownum() > position) - return false; /* Overflow over MAX_HA_ROWS. */ - if (position > bound->get_curr_rownum()) /* We are over the bound. */ - return false; - } + /* No valid bound to compare to. */ + if (position_cursor.is_outside_computation_bounds() || + top_bound->is_outside_computation_bounds() || + bottom_bound->is_outside_computation_bounds()) + return false; + + /* We are over the bound. */ + if (position < top_bound->get_curr_rownum()) + return false; + if (position > bottom_bound->get_curr_rownum()) + return false; return true; } @@ -1978,18 +1980,55 @@ private: /* Get the current position, accounting for the offset value, if present. NOTE: This function does not check over/underflow. */ - ha_rows get_current_position() + void get_current_position() { - ha_rows position = position_cursor.get_curr_rownum(); + position = position_cursor.get_curr_rownum(); + overflowed= false; if (offset) - position += offset->val_int() * (negative_offset ? -1 : 1); - return position; + { + if (offset_value < 0 && + position + offset_value > position) + { + overflowed= true; + } + if (offset_value > 0 && + position + offset_value < position) + { + overflowed= true; + } + position += offset_value; + } + } + + void save_offset_value() + { + if (offset) + offset_value= offset->val_int() * (negative_offset ? -1 : 1); + else + offset_value= 0; + } + + void save_positional_value() + { + get_current_position(); + if (!position_is_within_bounds()) + clear_sum_functions(); + else + { + cursor.move_to(position); + cursor.fetch(); + add_value_to_items(); + } } const Frame_cursor &position_cursor; - const Frame_cursor *bound; + const Frame_cursor *top_bound; + const Frame_cursor *bottom_bound; Item *offset; Table_read_cursor cursor; + ha_rows position; + longlong offset_value; + bool overflowed; bool negative_offset; }; @@ -2107,6 +2146,7 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager, { Window_spec *spec= window_func->window_spec; Item_sum *item_sum= window_func->window_func(); + DBUG_PRINT("info", ("Get arg count: %d", item_sum->get_arg_count())); Frame_cursor *fc; switch (item_sum->sum_func()) { @@ -2135,6 +2175,44 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager, fc->add_sum_func(item_sum); cursor_manager->add_cursor(fc); break; + case Item_sum::LEAD_FUNC: + case Item_sum::LAG_FUNC: + { + Frame_cursor *bottom_bound= new Frame_unbounded_following(thd, + spec->partition_list, + spec->order_list); + Frame_cursor *top_bound= new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + Frame_cursor *current_row_pos= new Frame_rows_current_row_bottom; + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + cursor_manager->add_cursor(current_row_pos); + DBUG_ASSERT(item_sum->fixed); + bool negative_offset= item_sum->sum_func() == Item_sum::LAG_FUNC; + fc= new Frame_positional_cursor(*current_row_pos, + *top_bound, *bottom_bound, + *item_sum->get_arg(1), + negative_offset); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } + case Item_sum::NTH_VALUE_FUNC: + { + Frame_cursor *bottom_bound= get_frame_cursor(thd, spec, false); + Frame_cursor *top_bound= get_frame_cursor(thd, spec, true); + cursor_manager->add_cursor(bottom_bound); + cursor_manager->add_cursor(top_bound); + DBUG_ASSERT(item_sum->fixed); + fc= new Frame_positional_cursor(*top_bound, + *top_bound, *bottom_bound, + *item_sum->get_arg(1), + false); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } default: fc= new Frame_unbounded_preceding( thd, spec->partition_list, spec->order_list); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 0d6bccf159c..b833ade350f 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1245,7 +1245,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FAULTS_SYM %token FETCH_SYM /* SQL-2003-R */ %token FILE_SYM -%token FIRST_VALUE_SYM /* SQL-2012 */ +%token FIRST_VALUE_SYM /* SQL-2011 */ %token FIRST_SYM /* SQL-2003-N */ %token FIXED_SYM %token FLOAT_NUM @@ -1273,6 +1273,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token GRANTS %token GROUP_SYM /* SQL-2003-R */ %token GROUP_CONCAT_SYM +%token LAG_SYM /* SQL-2011 */ +%token LEAD_SYM /* SQL-2011 */ %token HANDLER_SYM %token HARD_SYM %token HASH_SYM @@ -1431,6 +1433,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NUM %token NUMBER_SYM /* SQL-2003-N */ %token NUMERIC_SYM /* SQL-2003-R */ +%token NTH_VALUE_SYM /* SQL-2011 */ %token NVARCHAR_SYM %token OFFSET_SYM %token OLD_PASSWORD_SYM @@ -10518,7 +10521,50 @@ simple_window_func: if ($$ == NULL) MYSQL_YYABORT; } - ; + | + NTH_VALUE_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_nth_value(thd, $3, $5); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + LEAD_SYM '(' expr ')' + { + /* No second argument defaults to 1. */ + Item* item_offset= new (thd->mem_root) Item_uint(thd, 1); + if (item_offset == NULL) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_sum_lead(thd, $3, item_offset); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + LEAD_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_lead(thd, $3, $5); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + LAG_SYM '(' expr ')' + { + /* No second argument defaults to 1. */ + Item* item_offset= new (thd->mem_root) Item_uint(thd, 1); + if (item_offset == NULL) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_sum_lag(thd, $3, item_offset); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + LAG_SYM '(' expr ',' expr ')' + { + $$= new (thd->mem_root) Item_sum_lag(thd, $3, $5); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; window_name: ident