From 925e508a2e76c39173bb058ad8891202ce895d8f Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Sat, 19 Jun 2010 15:40:19 +0400 Subject: [PATCH 1/5] MWL#121: DS-MRR support for clustered primary keys - First code (will need code cleanup) --- sql/handler.h | 5 +- sql/multi_range_read.cc | 329 ++++++++++++++++++++++------ sql/multi_range_read.h | 20 +- sql/opt_range.cc | 2 + sql/sql_join_cache.cc | 3 +- sql/sql_select.cc | 3 +- storage/maria/ha_maria.cc | 11 +- storage/maria/ha_maria.h | 6 +- storage/myisam/ha_myisam.cc | 10 +- storage/myisam/ha_myisam.h | 6 +- storage/xtradb/handler/ha_innodb.cc | 12 +- storage/xtradb/handler/ha_innodb.h | 6 +- 12 files changed, 319 insertions(+), 94 deletions(-) diff --git a/sql/handler.h b/sql/handler.h index 124bd40711d..f2cc50de38a 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1752,9 +1752,10 @@ public: uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, COST_VECT *cost); + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, + uint n_ranges, uint key_parts, uint mode, HANDLER_BUFFER *buf); virtual int multi_range_read_next(char **range_info); virtual int read_range_first(const key_range *start_key, diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 644634c3d74..e0bccb9bf90 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -136,10 +136,11 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, */ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, - uint *bufsz, uint *flags, COST_VECT *cost) + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost) { *bufsz= 0; /* Default implementation doesn't need a buffer */ - + //psergey2-todo: assert for singlepoint ranges here? *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->zero(); @@ -197,7 +198,8 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, int handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf) + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf) { DBUG_ENTER("handler::multi_range_read_init"); mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); @@ -299,7 +301,8 @@ scan_it_again: */ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, - void *seq_init_param, uint n_ranges, uint mode, + void *seq_init_param, uint n_ranges, uint key_parts, + uint mode, HANDLER_BUFFER *buf) { uint elem_size; @@ -317,7 +320,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, use_default_impl= TRUE; const int retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param, - n_ranges, mode, buf); + n_ranges, key_parts, mode, buf); DBUG_RETURN(retval); } rowids_buf= buf->buffer; @@ -328,13 +331,33 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); rowids_buf_end= buf->buffer_end; - elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + + + doing_cpk_scan= check_cpk_scan(h->active_index, mode); + if (doing_cpk_scan) + { + uint keylen=0; + DBUG_ASSERT(key_parts != 0); + //psergey2-todo: new elem_size here + for (uint kp= 0; kp < key_parts; kp++) + keylen += table->key_info[h->active_index].key_part[kp].store_length; + + cpk_tuple_length= keylen; + cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts == + key_parts); + cpk_have_range= FALSE; + elem_size= keylen + (int)is_mrr_assoc * sizeof(void*); + use_default_impl= FALSE; + } + else + elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + rowids_buf_last= rowids_buf + ((rowids_buf_end - rowids_buf)/ elem_size)* elem_size; rowids_buf_end= rowids_buf_last; - /* + /* There can be two cases: - This is the first call since index_init(), h2==NULL Need to setup h2 then. @@ -342,72 +365,88 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, The caller might have called h->index_init(), need to switch h to rnd_pos calls. */ - if (!h2) + //psergey2-todo: don't create secondary for CPK scan. + if (!doing_cpk_scan) { - /* Create a separate handler object to do rndpos() calls. */ - THD *thd= current_thd; - /* - ::clone() takes up a lot of stack, especially on 64 bit platforms. - The constant 5 is an empiric result. - */ - if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) - DBUG_RETURN(1); - DBUG_ASSERT(h->active_index != MAX_KEY); - uint mrr_keyno= h->active_index; - - /* Create a separate handler object to do rndpos() calls. */ - if (!(new_h2= h->clone(thd->mem_root)) || - new_h2->ha_external_lock(thd, F_RDLCK)) + if (!h2) { - delete new_h2; - DBUG_RETURN(1); + /* Create a separate handler object to do rndpos() calls. */ + THD *thd= current_thd; + /* + ::clone() takes up a lot of stack, especially on 64 bit platforms. + The constant 5 is an empiric result. + */ + if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) + DBUG_RETURN(1); + DBUG_ASSERT(h->active_index != MAX_KEY); + uint mrr_keyno= h->active_index; + + /* Create a separate handler object to do rndpos() calls. */ + if (!(new_h2= h->clone(thd->mem_root)) || + new_h2->ha_external_lock(thd, F_RDLCK)) + { + delete new_h2; + DBUG_RETURN(1); + } + + if (mrr_keyno == h->pushed_idx_cond_keyno) + pushed_cond= h->pushed_idx_cond; + + /* + Caution: this call will invoke this->dsmrr_close(). Do not put the + created secondary table handler into this->h2 or it will delete it. + */ + if (h->ha_index_end()) + { + h2=new_h2; + goto error; + } + + h2= new_h2; /* Ok, now can put it into h2 */ + table->prepare_for_position(); + h2->extra(HA_EXTRA_KEYREAD); + + if (h2->ha_index_init(mrr_keyno, FALSE)) + goto error; + + use_default_impl= FALSE; + if (pushed_cond) + h2->idx_cond_push(mrr_keyno, pushed_cond); } - - if (mrr_keyno == h->pushed_idx_cond_keyno) - pushed_cond= h->pushed_idx_cond; - - /* - Caution: this call will invoke this->dsmrr_close(). Do not put the - created secondary table handler into this->h2 or it will delete it. - */ - if (h->ha_index_end()) + else { - h2=new_h2; - goto error; + /* + We get here when the access alternates betwen MRR scan(s) and non-MRR + scans. + + Calling h->index_end() will invoke dsmrr_close() for this object, + which will delete h2. We need to keep it, so save put it away and dont + let it be deleted: + */ + handler *save_h2= h2; + h2= NULL; + int res= (h->inited == handler::INDEX && h->ha_index_end()); + h2= save_h2; + use_default_impl= FALSE; + if (res) + goto error; } - - h2= new_h2; /* Ok, now can put it into h2 */ - table->prepare_for_position(); - h2->extra(HA_EXTRA_KEYREAD); - - if (h2->ha_index_init(mrr_keyno, FALSE)) - goto error; - - use_default_impl= FALSE; - if (pushed_cond) - h2->idx_cond_push(mrr_keyno, pushed_cond); } else { - /* - We get here when the access alternates betwen MRR scan(s) and non-MRR - scans. - - Calling h->index_end() will invoke dsmrr_close() for this object, - which will delete h2. We need to keep it, so save put it away and dont - let it be deleted: - */ - handler *save_h2= h2; - h2= NULL; - int res= (h->inited == handler::INDEX && h->ha_index_end()); - h2= save_h2; - use_default_impl= FALSE; - if (res) - goto error; + //doing DS-MRR/CPK + // fill-buffer-analog + // eof + h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + h->mrr_funcs= *seq_funcs; + dsmrr_fill_buffer_cpk(); + if (dsmrr_eof) + buf->end_of_used_area= rowids_buf_last; + DBUG_RETURN(0); // nothing can go wrong while filling the buffer } if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, - mode, buf) || + key_parts, mode, buf) || dsmrr_fill_buffer()) { goto error; @@ -524,6 +563,149 @@ int DsMrr_impl::dsmrr_fill_buffer() } +/* qsort-compatible function to compare key tuples */ +int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) +{ + DsMrr_impl *dsmrr= (DsMrr_impl*)arg; + TABLE *table= dsmrr->h->table; + + KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part; + KEY_PART_INFO *part_end= part + dsmrr->cpk_n_parts; + + //uint32 *lengths=item->field_lengths; + for (; part < part_end; ++part) + { + Field* f = part->field; + int len = part->store_length; + int res = f->cmp(key1, key2); + if (res) + return res; + key1 += len; + key2 += len; + } + return 0; +} + + +//psergey2: +int DsMrr_impl::dsmrr_fill_buffer_cpk() +{ + int res; + KEY_MULTI_RANGE cur_range; + DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk"); + + rowids_buf_cur= rowids_buf; + while ((rowids_buf_cur < rowids_buf_end) && + !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range))) + { + DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); + DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length); + + /* Put key, or {key, range_id} pair into the buffer */ + memcpy(rowids_buf_cur, cur_range.start_key.key, cpk_tuple_length); + rowids_buf_cur += cpk_tuple_length; + + if (is_mrr_assoc) + { + memcpy(rowids_buf_cur, &cur_range.ptr, sizeof(void*)); + rowids_buf_cur += sizeof(void*); + } + } + + dsmrr_eof= test(res); + + /* Sort the buffer contents by rowid */ + uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*); + uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; + + my_qsort2(rowids_buf, n_rowids, elem_size, + (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this); + rowids_buf_last= rowids_buf_cur; + rowids_buf_cur= rowids_buf; + DBUG_RETURN(0); +} + + +/* + CPK: so, the source is + - buffer exhaustion/re-fill + - advance to next range on "record-not-found" error. + - if scanning on a prefix, enumerate all records for a key. +*/ +int DsMrr_impl::dsmrr_next_cpk(char **range_info) +{ + int res; + + if (cpk_have_range) + { + res= h->index_next_same(table->record[0], rowids_buf_cur, cpk_tuple_length); + if (res != HA_ERR_END_OF_FILE) + { + // todo + if (is_mrr_assoc) + memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); + return res; + } + /* + Ok, we got EOF for records in this range. Fall through to get to another + range. + */ + } + + do + { + /* First, make sure we have a range at start of the buffer*/ + if (rowids_buf_cur == rowids_buf_last) + { + if (dsmrr_eof) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + // TODO: the return values are mix of HA_ERR_ codes and TRUE as "generic + // failure" error. Is this ok? + if ((res= dsmrr_fill_buffer_cpk())) + goto end; + } + + if (rowids_buf_cur == rowids_buf_last) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + + //TODO: skip-record calls here? + //if (h2->mrr_funcs.skip_record && + // h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) + // continue; + + /* Ok, got the range. Try making a lookup. */ + uchar *lookup_tuple= rowids_buf_cur; + rowids_buf_cur += cpk_tuple_length; + if (is_mrr_assoc) + { + memcpy(cpk_saved_range_info, rowids_buf_cur, sizeof(void*)); + rowids_buf_cur += sizeof(void*) * test(is_mrr_assoc); + } + + res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, + HA_READ_KEY_EXACT); + + if (res && res != HA_ERR_END_OF_FILE) + goto end; + + if (!res) + { + memcpy(range_info, cpk_saved_range_info, sizeof(void*)); + cpk_have_range= !cpk_is_unique_scan; + break; + } + } while (true); + +end: + return res; +} + /** DS-MRR implementation: multi_range_read_next() function */ @@ -536,6 +718,9 @@ int DsMrr_impl::dsmrr_next(char **range_info) if (use_default_impl) return h->handler::multi_range_read_next(range_info); + + if (doing_cpk_scan) + return dsmrr_next_cpk(range_info); do { @@ -582,7 +767,8 @@ end: /** DS-MRR implementation: multi_range_read_info() function */ -ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, +ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost) { ha_rows res; @@ -590,8 +776,8 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint def_bufsz= *bufsz; /* Get cost/flags/mem_usage of default MRR implementation */ - res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz, - &def_flags, cost); + res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts, + &def_bufsz, &def_flags, cost); DBUG_ASSERT(!res); if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || @@ -705,6 +891,13 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) @retval TRUE Default MRR implementation should be used @retval FALSE DS-MRR implementation should be used */ +bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags) +{ + return test((mrr_flags & HA_MRR_SINGLE_POINT) && + !(mrr_flags & HA_MRR_SORTED) && + keyno == table->s->primary_key && + h->primary_key_is_clustered()); +} bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, COST_VECT *cost) @@ -712,8 +905,12 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, COST_VECT dsmrr_cost; bool res; THD *thd= current_thd; + //psergey2: check the criteria. + doing_cpk_scan= check_cpk_scan(keyno, *flags); + if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || - (keyno == table->s->primary_key && h->primary_key_is_clustered()) || + (keyno == table->s->primary_key && h->primary_key_is_clustered() && + !doing_cpk_scan) || key_uses_partial_cols(table, keyno)) { /* Use the default implementation */ diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h index 90e2e4c93d6..b379d4f517d 100644 --- a/sql/multi_range_read.h +++ b/sql/multi_range_read.h @@ -43,6 +43,17 @@ private: bool is_mrr_assoc; bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ + + bool doing_cpk_scan; + uint cpk_tuple_length; + uint cpk_n_parts; + bool cpk_is_unique_scan; + char *cpk_saved_range_info; + bool cpk_have_range; + + + bool check_cpk_scan(uint keyno, uint mrr_flags); + static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); public: void init(handler *h_arg, TABLE *table_arg) { @@ -50,13 +61,16 @@ public: table= table_arg; } int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf); void dsmrr_close(); int dsmrr_fill_buffer(); + int dsmrr_fill_buffer_cpk(); int dsmrr_next(char **range_info); + int dsmrr_next_cpk(char **range_info); - ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz, - uint *flags, COST_VECT *cost); + ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, + uint *bufsz, uint *flags, COST_VECT *cost); ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 27ecdea9568..25c4259295f 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8006,6 +8006,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, quick->mrr_buf_size= thd->variables.mrr_buff_size; if (table->file->multi_range_read_info(quick->index, 1, (uint)records, + uint(-1), &quick->mrr_buf_size, &quick->mrr_flags, &cost)) goto err; @@ -8367,6 +8368,7 @@ int QUICK_RANGE_SELECT::reset() RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0}; error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements, + uint(-1), mrr_flags, mrr_buf_desc? mrr_buf_desc: &empty_buf); DBUG_RETURN(error); diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index d88cc7a9f7f..120b109d8ff 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -2377,7 +2377,8 @@ JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges) if (!file->inited) file->ha_index_init(join_tab->ref.key, 1); if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges, - mrr_mode, &mrr_buff))) + join_tab->ref.key_parts, + mrr_mode, &mrr_buff))) rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; return rc; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8a8952550c0..1c1b054a2ea 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7318,10 +7318,11 @@ uint check_join_cache_usage(JOIN_TAB *tab, case JT_EQ_REF: if (cache_level <= 4) return 0; - flags= HA_MRR_NO_NULL_ENDPOINTS; + flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT; if (tab->table->covering_keys.is_set(tab->ref.key)) flags|= HA_MRR_INDEX_ONLY; rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20, + tab->ref.key_parts, &bufsz, &flags, &cost); if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) && (!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) && diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 7c34a5f7595..43c6cd6606a 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -3501,10 +3501,11 @@ static SHOW_VAR status_variables[]= { ***************************************************************************/ int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, + uint n_ranges, uint key_parts, uint mode, HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, + mode, buf); } int ha_maria::multi_range_read_next(char **range_info) @@ -3528,11 +3529,11 @@ ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, - COST_VECT *cost) + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost) { ds_mrr.init(this, table); - return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost); + return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost); } /* MyISAM MRR implementation ends */ diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index c2ff99fab0e..177008f422a 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -174,14 +174,16 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, COST_VECT *cost); + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); /* Index condition pushdown implementation */ Item *idx_cond_push(uint keyno, Item* idx_cond); diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 0a4229c2ab2..bb6ac446a4f 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -2217,10 +2217,10 @@ static int myisam_init(void *p) ***************************************************************************/ int ha_myisam::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, + uint n_ranges, uint key_parts, uint mode, HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, mode, buf); } int ha_myisam::multi_range_read_next(char **range_info) @@ -2244,11 +2244,11 @@ ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, - COST_VECT *cost) + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost) { ds_mrr.init(this, table); - return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost); + return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost); } /* MyISAM MRR implementation ends */ diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 76db0e89536..d37870b861b 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -162,14 +162,16 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, COST_VECT *cost); + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); /* Index condition pushdown implementation */ Item *idx_cond_push(uint keyno, Item* idx_cond); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index db25b39caab..a8ccb426aa5 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -11025,9 +11025,10 @@ test_innobase_convert_name() */ int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf) + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, mode, buf); } int ha_innobase::multi_range_read_next(char **range_info) @@ -11052,12 +11053,13 @@ ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, return res; } -ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, - uint keys, uint *bufsz, +ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost) { ds_mrr.init(this, table); - ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost); + ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, + flags, cost); return res; } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 6c7098560b9..0c1f2b42dd6 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -210,14 +210,16 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); + uint n_ranges, uint key_parts, uint mode, + HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, COST_VECT *cost); + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); DsMrr_impl ds_mrr; Item *idx_cond_push(uint keyno, Item* idx_cond); From 82f8ed17e1172f949857385a7bc7bebde82a1602 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Mon, 21 Jun 2010 12:34:31 +0400 Subject: [PATCH 2/5] MWL#121: DS-MRR support for clustered primary keys - Add testcases --- sql/multi_range_read.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index e0bccb9bf90..72c85ec11bb 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -674,6 +674,7 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) goto end; } + //TODO: make skip_index_tuple() calls, too? //TODO: skip-record calls here? //if (h2->mrr_funcs.skip_record && // h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) From 16e197f5b10fdee23703b94c5549bc17cd81c6f8 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Tue, 22 Jun 2010 21:24:22 +0400 Subject: [PATCH 3/5] MWL#121: DS-MRR support for clustered primary keys - Add testcases - Code cleanup: garbage removal, better comments, make members private where possible --- mysql-test/r/innodb_mrr_cpk.result | 134 ++++++++++++ mysql-test/t/innodb_mrr_cpk.test | 134 ++++++++++++ sql/multi_range_read.cc | 341 +++++++++++++++++------------ sql/multi_range_read.h | 157 +++++++++---- 4 files changed, 577 insertions(+), 189 deletions(-) create mode 100644 mysql-test/r/innodb_mrr_cpk.result create mode 100644 mysql-test/t/innodb_mrr_cpk.test diff --git a/mysql-test/r/innodb_mrr_cpk.result b/mysql-test/r/innodb_mrr_cpk.result new file mode 100644 index 00000000000..f93807e14d8 --- /dev/null +++ b/mysql-test/r/innodb_mrr_cpk.result @@ -0,0 +1,134 @@ +drop table if exists t0,t1,t2,t3; +set @save_join_cache_level=@@join_cache_level; +set join_cache_level=6; +set @save_storage_engine=@@storage_engine; +set storage_engine=innodb; +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a char(8), b char(8), filler char(100), primary key(a)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` char(8) NOT NULL DEFAULT '', + `b` char(8) DEFAULT NULL, + `filler` char(100) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +insert into t1 select +concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), +concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'), +'filler' +from t0 A, t0 B, t0 C; +create table t2 (a char(8)); +insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A'); +This should use join buffer: +explain select * from t1, t2 where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer +This output must be sorted by value of t1.a: +select * from t1, t2 where t1.a=t2.a; +a b filler a +a-1010=A b-1010=B filler a-1010=A +a-1020=A b-1020=B filler a-1020=A +a-1030=A b-1030=B filler a-1030=A +drop table t1, t2; +create table t1( +a char(8) character set utf8, b int, filler char(100), +primary key(a,b) +); +insert into t1 select +concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), +1000 + A.a + B.a*10 + C.a*100, +'filler' +from t0 A, t0 B, t0 C; +create table t2 (a char(8) character set utf8, b int); +insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +a b filler a b +a-1010=A 1010 filler a-1010=A 1010 +a-1020=A 1020 filler a-1020=A 1020 +a-1030=A 1030 filler a-1030=A 1030 +insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 5 +1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +a b filler a b +a-1010=A 1010 filler a-1010=A 1010 +a-1020=A 1020 filler a-1020=A 1020 +a-1020=A 1020 filler a-1020=A 1020 +a-1030=A 1030 filler a-1030=A 1030 +a-1030=A 1030 filler a-1030=A 1030 +drop table t1, t2; +create table t1( +a varchar(8) character set utf8, b int, filler char(100), +primary key(a,b) +); +insert into t1 select +concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), +1000 + A.a + B.a*10 + C.a*100, +'filler' +from t0 A, t0 B, t0 C; +create table t2 (a char(8) character set utf8, b int); +insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +a b filler a b +a-1010=A 1010 filler a-1010=A 1010 +a-1020=A 1020 filler a-1020=A 1020 +a-1030=A 1030 filler a-1030=A 1030 +explain select * from t1, t2 where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer +select * from t1, t2 where t1.a=t2.a; +a b filler a b +a-1010=A 1010 filler a-1010=A 1010 +a-1020=A 1020 filler a-1020=A 1020 +a-1030=A 1030 filler a-1030=A 1030 +drop table t1, t2; +create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c)); +insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C; +insert into t1 values (11, 11, 11, 'filler'); +insert into t1 values (11, 11, 12, 'filler'); +insert into t1 values (11, 11, 13, 'filler'); +insert into t1 values (11, 22, 1234, 'filler'); +insert into t1 values (11, 33, 124, 'filler'); +insert into t1 values (11, 33, 125, 'filler'); +create table t2 (a int, b int); +insert into t2 values (11,33), (11,22), (11,11); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +a b c filler a b +11 11 11 filler 11 11 +11 11 12 filler 11 11 +11 11 13 filler 11 11 +11 22 1234 filler 11 22 +11 33 124 filler 11 33 +11 33 125 filler 11 33 +set join_cache_level=0; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +a b c filler a b +11 33 124 filler 11 33 +11 33 125 filler 11 33 +11 22 1234 filler 11 22 +11 11 11 filler 11 11 +11 11 12 filler 11 11 +11 11 13 filler 11 11 +set join_cache_level=6; +drop table t1,t2; +set @@join_cache_level= @save_join_cache_level; +set storage_engine=@save_storage_engine; +drop table t0; diff --git a/mysql-test/t/innodb_mrr_cpk.test b/mysql-test/t/innodb_mrr_cpk.test new file mode 100644 index 00000000000..84b37840880 --- /dev/null +++ b/mysql-test/t/innodb_mrr_cpk.test @@ -0,0 +1,134 @@ +# +# Tests for DS-MRR over clustered primary key. The only engine that supports +# this is InnoDB/XtraDB. +# +# Basic idea about testing +# - DS-MRR/CPK works only with BKA +# - Should also test index condition pushdown +# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering +# - Also test access using prefix of primary key +# +# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for +# #rows, the call is there at all only for applicability check +# +-- source include/have_innodb.inc + +--disable_warnings +drop table if exists t0,t1,t2,t3; +--enable_warnings + +set @save_join_cache_level=@@join_cache_level; +set join_cache_level=6; + +set @save_storage_engine=@@storage_engine; +set storage_engine=innodb; + +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a char(8), b char(8), filler char(100), primary key(a)); +show create table t1; + +insert into t1 select + concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), + concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'), + 'filler' +from t0 A, t0 B, t0 C; + +create table t2 (a char(8)); +insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A'); + +--echo This should use join buffer: +explain select * from t1, t2 where t1.a=t2.a; + +--echo This output must be sorted by value of t1.a: +select * from t1, t2 where t1.a=t2.a; +drop table t1, t2; + +# Try multi-column indexes +create table t1( + a char(8) character set utf8, b int, filler char(100), + primary key(a,b) +); + +insert into t1 select + concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), + 1000 + A.a + B.a*10 + C.a*100, + 'filler' +from t0 A, t0 B, t0 C; + +create table t2 (a char(8) character set utf8, b int); +insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; + +# Try with dataset that causes identical lookup keys: +insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; + +drop table t1, t2; + +create table t1( + a varchar(8) character set utf8, b int, filler char(100), + primary key(a,b) +); + +insert into t1 select + concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'), + 1000 + A.a + B.a*10 + C.a*100, + 'filler' +from t0 A, t0 B, t0 C; + +create table t2 (a char(8) character set utf8, b int); +insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020); +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; + +# +# Try scanning on a CPK prefix +# +explain select * from t1, t2 where t1.a=t2.a; +select * from t1, t2 where t1.a=t2.a; +drop table t1, t2; + +# +# The above example is not very interesting, as CPK prefix has +# only one match. Create a dataset where scan on CPK prefix +# would produce multiple matches: +# +create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c)); +insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C; + +insert into t1 values (11, 11, 11, 'filler'); +insert into t1 values (11, 11, 12, 'filler'); +insert into t1 values (11, 11, 13, 'filler'); +insert into t1 values (11, 22, 1234, 'filler'); +insert into t1 values (11, 33, 124, 'filler'); +insert into t1 values (11, 33, 125, 'filler'); + +create table t2 (a int, b int); +insert into t2 values (11,33), (11,22), (11,11); + +explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; + +set join_cache_level=0; +select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +set join_cache_level=6; + +drop table t1,t2; + +# +# Check that Index Condition Pushdown (BKA) actually works: +# + +# TODO + +# +# Check that record-check-func is done: +# + +set @@join_cache_level= @save_join_cache_level; +set storage_engine=@save_storage_engine; +drop table t0; + diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 72c85ec11bb..46790adee9e 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -139,8 +139,13 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost) { + /* + Currently we expect this function to be called only in preparation of scan + with HA_MRR_SINGLE_POINT property. + */ + DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT); + *bufsz= 0; /* Default implementation doesn't need a buffer */ - //psergey2-todo: assert for singlepoint ranges here? *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->zero(); @@ -323,22 +328,25 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, n_ranges, key_parts, mode, buf); DBUG_RETURN(retval); } - rowids_buf= buf->buffer; + mrr_buf= buf->buffer; is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); if (is_mrr_assoc) status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); - rowids_buf_end= buf->buffer_end; + mrr_buf_end= buf->buffer_end; doing_cpk_scan= check_cpk_scan(h->active_index, mode); if (doing_cpk_scan) { + /* + When doing a scan on CPK, the buffer stores {lookup_tuple, range_id} + pairs + */ uint keylen=0; DBUG_ASSERT(key_parts != 0); - //psergey2-todo: new elem_size here for (uint kp= 0; kp < key_parts; kp++) keylen += table->key_info[h->active_index].key_part[kp].store_length; @@ -350,12 +358,29 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, use_default_impl= FALSE; } else + { + /* In regular DS-MRR, buffer stores {rowid, range_id} pairs */ elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + } - rowids_buf_last= rowids_buf + - ((rowids_buf_end - rowids_buf)/ elem_size)* + mrr_buf_last= mrr_buf + + ((mrr_buf_end - mrr_buf)/ elem_size)* elem_size; - rowids_buf_end= rowids_buf_last; + mrr_buf_end= mrr_buf_last; + + if (doing_cpk_scan) + { + /* + DS-MRR/CPK: fill buffer with lookup tuples and sort; also we don't need a + secondary handler object. + */ + h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + h->mrr_funcs= *seq_funcs; + dsmrr_fill_buffer_cpk(); + if (dsmrr_eof) + buf->end_of_used_area= mrr_buf_last; + DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */ + } /* There can be two cases: @@ -365,84 +390,68 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, The caller might have called h->index_init(), need to switch h to rnd_pos calls. */ - //psergey2-todo: don't create secondary for CPK scan. - if (!doing_cpk_scan) + if (!h2) { - if (!h2) + /* Create a separate handler object to do rndpos() calls. */ + THD *thd= current_thd; + /* + ::clone() takes up a lot of stack, especially on 64 bit platforms. + The constant 5 is an empiric result. + */ + if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) + DBUG_RETURN(1); + DBUG_ASSERT(h->active_index != MAX_KEY); + uint mrr_keyno= h->active_index; + + /* Create a separate handler object to do rndpos() calls. */ + if (!(new_h2= h->clone(thd->mem_root)) || + new_h2->ha_external_lock(thd, F_RDLCK)) { - /* Create a separate handler object to do rndpos() calls. */ - THD *thd= current_thd; - /* - ::clone() takes up a lot of stack, especially on 64 bit platforms. - The constant 5 is an empiric result. - */ - if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) - DBUG_RETURN(1); - DBUG_ASSERT(h->active_index != MAX_KEY); - uint mrr_keyno= h->active_index; - - /* Create a separate handler object to do rndpos() calls. */ - if (!(new_h2= h->clone(thd->mem_root)) || - new_h2->ha_external_lock(thd, F_RDLCK)) - { - delete new_h2; - DBUG_RETURN(1); - } - - if (mrr_keyno == h->pushed_idx_cond_keyno) - pushed_cond= h->pushed_idx_cond; - - /* - Caution: this call will invoke this->dsmrr_close(). Do not put the - created secondary table handler into this->h2 or it will delete it. - */ - if (h->ha_index_end()) - { - h2=new_h2; - goto error; - } - - h2= new_h2; /* Ok, now can put it into h2 */ - table->prepare_for_position(); - h2->extra(HA_EXTRA_KEYREAD); - - if (h2->ha_index_init(mrr_keyno, FALSE)) - goto error; - - use_default_impl= FALSE; - if (pushed_cond) - h2->idx_cond_push(mrr_keyno, pushed_cond); + delete new_h2; + DBUG_RETURN(1); } - else + + if (mrr_keyno == h->pushed_idx_cond_keyno) + pushed_cond= h->pushed_idx_cond; + + /* + Caution: this call will invoke this->dsmrr_close(). Do not put the + created secondary table handler into this->h2 or it will delete it. + */ + if (h->ha_index_end()) { - /* - We get here when the access alternates betwen MRR scan(s) and non-MRR - scans. - - Calling h->index_end() will invoke dsmrr_close() for this object, - which will delete h2. We need to keep it, so save put it away and dont - let it be deleted: - */ - handler *save_h2= h2; - h2= NULL; - int res= (h->inited == handler::INDEX && h->ha_index_end()); - h2= save_h2; - use_default_impl= FALSE; - if (res) - goto error; + h2=new_h2; + goto error; } + + h2= new_h2; /* Ok, now can put it into h2 */ + table->prepare_for_position(); + h2->extra(HA_EXTRA_KEYREAD); + + if (h2->ha_index_init(mrr_keyno, FALSE)) + goto error; + + use_default_impl= FALSE; + if (pushed_cond) + h2->idx_cond_push(mrr_keyno, pushed_cond); } else { - //doing DS-MRR/CPK - // fill-buffer-analog - // eof - h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); - h->mrr_funcs= *seq_funcs; - dsmrr_fill_buffer_cpk(); - if (dsmrr_eof) - buf->end_of_used_area= rowids_buf_last; - DBUG_RETURN(0); // nothing can go wrong while filling the buffer + /* + We get here when the access alternates betwen MRR scan(s) and non-MRR + scans. + + Calling h->index_end() will invoke dsmrr_close() for this object, + which will delete h2. We need to keep it, so save put it away and dont + let it be deleted: + */ + handler *save_h2= h2; + h2= NULL; + int res= (h->inited == handler::INDEX && h->ha_index_end()); + h2= save_h2; + use_default_impl= FALSE; + if (res) + goto error; } if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, @@ -456,7 +465,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, adjust *buf to indicate that the remaining buffer space will not be used. */ if (dsmrr_eof) - buf->end_of_used_area= rowids_buf_last; + buf->end_of_used_area= mrr_buf_last; /* h->inited == INDEX may occur when 'range checked for each record' is @@ -512,6 +521,9 @@ static int rowid_cmp(void *h, uchar *a, uchar *b) rowid and return. The function assumes that rowids buffer is empty when it is invoked. + + dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're + scanning. @param h Table handler @@ -526,8 +538,8 @@ int DsMrr_impl::dsmrr_fill_buffer() int res; DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); - rowids_buf_cur= rowids_buf; - while ((rowids_buf_cur < rowids_buf_end) && + mrr_buf_cur= mrr_buf; + while ((mrr_buf_cur < mrr_buf_end) && !(res= h2->handler::multi_range_read_next(&range_info))) { KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; @@ -537,13 +549,13 @@ int DsMrr_impl::dsmrr_fill_buffer() /* Put rowid, or {rowid, range_id} pair into the buffer */ h2->position(table->record[0]); - memcpy(rowids_buf_cur, h2->ref, h2->ref_length); - rowids_buf_cur += h2->ref_length; + memcpy(mrr_buf_cur, h2->ref, h2->ref_length); + mrr_buf_cur += h2->ref_length; if (is_mrr_assoc) { - memcpy(rowids_buf_cur, &range_info, sizeof(void*)); - rowids_buf_cur += sizeof(void*); + memcpy(mrr_buf_cur, &range_info, sizeof(void*)); + mrr_buf_cur += sizeof(void*); } } @@ -553,27 +565,29 @@ int DsMrr_impl::dsmrr_fill_buffer() /* Sort the buffer contents by rowid */ uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; + uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size; - my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, + my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, (void*)h); - rowids_buf_last= rowids_buf_cur; - rowids_buf_cur= rowids_buf; + mrr_buf_last= mrr_buf_cur; + mrr_buf_cur= mrr_buf; DBUG_RETURN(0); } -/* qsort-compatible function to compare key tuples */ +/* + my_qsort2-compatible function to compare key tuples +*/ + int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) { DsMrr_impl *dsmrr= (DsMrr_impl*)arg; TABLE *table= dsmrr->h->table; KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part; - KEY_PART_INFO *part_end= part + dsmrr->cpk_n_parts; + uchar *key1_end= key1 + dsmrr->cpk_tuple_length; - //uint32 *lengths=item->field_lengths; - for (; part < part_end; ++part) + while (key1 < key1_end) { Field* f = part->field; int len = part->store_length; @@ -582,33 +596,43 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) return res; key1 += len; key2 += len; + part++; } return 0; } -//psergey2: -int DsMrr_impl::dsmrr_fill_buffer_cpk() +/* + DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort + + DESCRIPTION + DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort + + dsmrr_eof is set to indicate whether we've exhausted the list of ranges + we're scanning. +*/ + +void DsMrr_impl::dsmrr_fill_buffer_cpk() { int res; KEY_MULTI_RANGE cur_range; DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk"); - rowids_buf_cur= rowids_buf; - while ((rowids_buf_cur < rowids_buf_end) && + mrr_buf_cur= mrr_buf; + while ((mrr_buf_cur < mrr_buf_end) && !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range))) { DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length); /* Put key, or {key, range_id} pair into the buffer */ - memcpy(rowids_buf_cur, cur_range.start_key.key, cpk_tuple_length); - rowids_buf_cur += cpk_tuple_length; + memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length); + mrr_buf_cur += cpk_tuple_length; if (is_mrr_assoc) { - memcpy(rowids_buf_cur, &cur_range.ptr, sizeof(void*)); - rowids_buf_cur += sizeof(void*); + memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*)); + mrr_buf_cur += sizeof(void*); } } @@ -616,77 +640,82 @@ int DsMrr_impl::dsmrr_fill_buffer_cpk() /* Sort the buffer contents by rowid */ uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*); - uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; + uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size; - my_qsort2(rowids_buf, n_rowids, elem_size, + my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this); - rowids_buf_last= rowids_buf_cur; - rowids_buf_cur= rowids_buf; - DBUG_RETURN(0); + mrr_buf_last= mrr_buf_cur; + mrr_buf_cur= mrr_buf; + DBUG_VOID_RETURN; } /* - CPK: so, the source is - - buffer exhaustion/re-fill - - advance to next range on "record-not-found" error. - - if scanning on a prefix, enumerate all records for a key. + DS-MRR/CPK: multi_range_read_next() function + + DESCRIPTION + DsMrr_impl::dsmrr_next_cpk() + + DESCRIPTION + DS-MRR/CPK: multi_range_read_next() function. + This is similar to DsMrr_impl::dsmrr_next(), the differences are that + - we get records with index_read(), not with rnd_pos() + - we may get multiple records for one key (=element of the buffer) + - unlike dsmrr_fill_buffer(), dsmrr_fill_buffer_cpk() never fails. + + RETURN + 0 OK, next record was successfully read + HA_ERR_END_OF_FILE End of records + Other Some other error */ + int DsMrr_impl::dsmrr_next_cpk(char **range_info) { int res; if (cpk_have_range) { - res= h->index_next_same(table->record[0], rowids_buf_cur, cpk_tuple_length); + res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length); if (res != HA_ERR_END_OF_FILE) { - // todo if (is_mrr_assoc) memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); return res; } - /* - Ok, we got EOF for records in this range. Fall through to get to another - range. - */ + /* No more records in this range. Fall through to get to another range */ } do { - /* First, make sure we have a range at start of the buffer*/ - if (rowids_buf_cur == rowids_buf_last) + /* First, make sure we have a range at start of the buffer */ + if (mrr_buf_cur == mrr_buf_last) { if (dsmrr_eof) { res= HA_ERR_END_OF_FILE; goto end; } - // TODO: the return values are mix of HA_ERR_ codes and TRUE as "generic - // failure" error. Is this ok? - if ((res= dsmrr_fill_buffer_cpk())) - goto end; + dsmrr_fill_buffer_cpk(); } - - if (rowids_buf_cur == rowids_buf_last) + if (mrr_buf_cur == mrr_buf_last) { res= HA_ERR_END_OF_FILE; goto end; } - //TODO: make skip_index_tuple() calls, too? - //TODO: skip-record calls here? + //psergey2-todo: make skip_index_tuple() calls, too? + //psergey2-todo: skip-record calls here? //if (h2->mrr_funcs.skip_record && // h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) // continue; /* Ok, got the range. Try making a lookup. */ - uchar *lookup_tuple= rowids_buf_cur; - rowids_buf_cur += cpk_tuple_length; + uchar *lookup_tuple= mrr_buf_cur; + mrr_buf_cur += cpk_tuple_length; if (is_mrr_assoc) { - memcpy(cpk_saved_range_info, rowids_buf_cur, sizeof(void*)); - rowids_buf_cur += sizeof(void*) * test(is_mrr_assoc); + memcpy(cpk_saved_range_info, mrr_buf_cur, sizeof(void*)); + mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc); } res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, @@ -698,6 +727,10 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) if (!res) { memcpy(range_info, cpk_saved_range_info, sizeof(void*)); + /* + Attempt reading more rows from this range only if there actually can + be multiple matches: + */ cpk_have_range= !cpk_is_unique_scan; break; } @@ -707,6 +740,7 @@ end: return res; } + /** DS-MRR implementation: multi_range_read_next() function */ @@ -725,7 +759,7 @@ int DsMrr_impl::dsmrr_next(char **range_info) do { - if (rowids_buf_cur == rowids_buf_last) + if (mrr_buf_cur == mrr_buf_last) { if (dsmrr_eof) { @@ -738,17 +772,17 @@ int DsMrr_impl::dsmrr_next(char **range_info) } /* return eof if there are no rowids in the buffer after re-fill attempt */ - if (rowids_buf_cur == rowids_buf_last) + if (mrr_buf_cur == mrr_buf_last) { res= HA_ERR_END_OF_FILE; goto end; } - rowid= rowids_buf_cur; + rowid= mrr_buf_cur; if (is_mrr_assoc) - memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); + memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**)); - rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); + mrr_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); if (h2->mrr_funcs.skip_record && h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) continue; @@ -870,7 +904,33 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) return FALSE; } -/** + +/* + Check if key/flags allow DS-MRR/CPK strategy to be used + + SYNOPSIS + DsMrr_impl::check_cpk_scan() + keyno Index that will be used + mrr_flags + + DESCRIPTION + Check if key/flags allow DS-MRR/CPK strategy to be used. + + RETURN + TRUE DS-MRR/CPK should be used + FALSE Otherwise +*/ + +bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags) +{ + return test((mrr_flags & HA_MRR_SINGLE_POINT) && + !(mrr_flags & HA_MRR_SORTED) && + keyno == table->s->primary_key && + h->primary_key_is_clustered()); +} + + +/* DS-MRR Internals: Choose between Default MRR implementation and DS-MRR Make the choice between using Default MRR implementation and DS-MRR. @@ -892,13 +952,7 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) @retval TRUE Default MRR implementation should be used @retval FALSE DS-MRR implementation should be used */ -bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags) -{ - return test((mrr_flags & HA_MRR_SINGLE_POINT) && - !(mrr_flags & HA_MRR_SORTED) && - keyno == table->s->primary_key && - h->primary_key_is_clustered()); -} + bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, COST_VECT *cost) @@ -906,9 +960,8 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, COST_VECT dsmrr_cost; bool res; THD *thd= current_thd; - //psergey2: check the criteria. - doing_cpk_scan= check_cpk_scan(keyno, *flags); + doing_cpk_scan= check_cpk_scan(keyno, *flags); if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || (keyno == table->s->primary_key && h->primary_key_is_clustered() && !doing_cpk_scan) || diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h index b379d4f517d..5dd2e0d6adf 100644 --- a/sql/multi_range_read.h +++ b/sql/multi_range_read.h @@ -1,16 +1,76 @@ /* - This file contains declarations for - - Disk-Sweep MultiRangeRead (DS-MRR) implementation + This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR) + implementation */ /** - A Disk-Sweep MRR interface implementation + A Disk-Sweep implementation of MRR Interface (DS-MRR for short) - This implementation makes range (and, in the future, 'ref') scans to read - table rows in disk sweeps. - - Currently it is used by MyISAM and InnoDB. Potentially it can be used with - any table handler that has non-clustered indexes and on-disk rows. + This is a "plugin"(*) for storage engines that allows make index scans + read table rows in rowid order. For disk-based storage engines, this is + faster than reading table rows in whatever-SQL-layer-makes-calls-in order. + + (*) - only conceptually. No dynamic loading or binary compatibility of any + kind. + + General scheme of things: + + SQL Layer code + | | | + -v---v---v---- handler->multi_range_read_XXX() function calls + | | | + ____________________________________ + / DS-MRR module \ + | (scan indexes, order rowids, do | + | full record reads in rowid order) | + \____________________________________/ + | | | + -|---|---|----- handler->read_range_first()/read_range_next(), + | | | handler->index_read(), handler->rnd_pos() calls. + | | | + v v v + Storage engine internals + + Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines. + Potentially it can be used with any table handler that has disk-based data + storage and has better performance when reading data in rowid order. +*/ + + +/* + DS-MRR implementation for one table. Create/use one object of this class for + each ha_{myisam/innobase/etc} object. That object will be further referred to + as "the handler" + + There are actually three strategies + S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to + MRR-to-non-MRR calls converter) + S2. Regular DS-MRR + S3. DS-MRR/CPK for doing scans on clustered primary keys. + + S1 is used for cases which DS-MRR is unable to handle for some reason. + + S2 is the actual DS-MRR. The basic algorithm is as follows: + 1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and + fill the buffer with {rowid, range_id} pairs + 2. Sort the buffer by rowid + 3. for each {rowid, range_id} pair in the buffer + get record by rowid and return the {record, range_id} pair + 4. Repeat the above steps until we've exhausted the list of ranges we're + scanning. + + S3 is the variant of DS-MRR for use with clustered primary keys (or any + clustered index). The idea is that in clustered index it is sufficient to + access the index in index order, and we don't need an intermediate steps to + get rowid (like step #1 in S2). + + DS-MRR/CPK's basic algorithm is as follows: + 1. Collect a number of ranges (=lookup keys) + 2. Sort them so that they follow in index order. + 3. for each {lookup_key, range_id} pair in the buffer + get record(s) matching the lookup key and return {record, range_id} pairs + 4. Repeat the above steps until we've exhausted the list of ranges we're + scanning. */ class DsMrr_impl @@ -21,40 +81,6 @@ public: DsMrr_impl() : h2(NULL) {}; - /* - The "owner" handler object (the one that calls dsmrr_XXX functions. - It is used to retrieve full table rows by calling rnd_pos(). - */ - handler *h; - TABLE *table; /* Always equal to h->table */ -private: - /* Secondary handler object. It is used for scanning the index */ - handler *h2; - - /* Buffer to store rowids, or (rowid, range_id) pairs */ - uchar *rowids_buf; - uchar *rowids_buf_cur; /* Current position when reading/writing */ - uchar *rowids_buf_last; /* When reading: end of used buffer space */ - uchar *rowids_buf_end; /* End of the buffer */ - - bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ - - /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */ - bool is_mrr_assoc; - - bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ - - bool doing_cpk_scan; - uint cpk_tuple_length; - uint cpk_n_parts; - bool cpk_is_unique_scan; - char *cpk_saved_range_info; - bool cpk_have_range; - - - bool check_cpk_scan(uint keyno, uint mrr_flags); - static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); -public: void init(handler *h_arg, TABLE *table_arg) { h= h_arg; @@ -64,10 +90,7 @@ public: uint n_ranges, uint key_parts, uint mode, HANDLER_BUFFER *buf); void dsmrr_close(); - int dsmrr_fill_buffer(); - int dsmrr_fill_buffer_cpk(); int dsmrr_next(char **range_info); - int dsmrr_next_cpk(char **range_info); ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost); @@ -76,9 +99,53 @@ public: void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); private: + /* + The "owner" handler object (the one that calls dsmrr_XXX functions. + It is used to retrieve full table rows by calling rnd_pos(). + */ + handler *h; + TABLE *table; /* Always equal to h->table */ + + /* Secondary handler object. It is used for scanning the index */ + handler *h2; + + /* Buffer to store rowids, or (rowid, range_id) pairs */ + uchar *mrr_buf; + uchar *mrr_buf_cur; /* Current position when reading/writing */ + uchar *mrr_buf_last; /* When reading: end of used buffer space */ + uchar *mrr_buf_end; /* End of the buffer */ + + bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ + + /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */ + bool is_mrr_assoc; + + bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ + + bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */ + + /** DS-MRR/CPK variables start */ + + /* Length of lookup tuple being used, in bytes */ + uint cpk_tuple_length; + /* + TRUE <=> We're scanning on a full primary key (and not on prefix), and so + can get max. one match for each key + */ + bool cpk_is_unique_scan; + /* TRUE<=> we're in a middle of enumerating records from a range */ + bool cpk_have_range; + /* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */ + char *cpk_saved_range_info; + bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, COST_VECT *cost); bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, uint *buffer_size, COST_VECT *cost); + bool check_cpk_scan(uint keyno, uint mrr_flags); + static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); + int dsmrr_fill_buffer(); + void dsmrr_fill_buffer_cpk(); + int dsmrr_next_cpk(char **range_info); }; From b45748f058e00b941d7b737cc43dcc3ad237d5d3 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Tue, 22 Jun 2010 22:38:52 +0400 Subject: [PATCH 4/5] MWL#121: DS-MRR support for clustered primary keys - Fix the code to work with IndexConditionPushdown+BKA (EXPLAIN is still incorrect, see comments in the patch) - Test coverage for ICP+BKA --- mysql-test/r/innodb_mrr_cpk.result | 14 ++++++++ mysql-test/t/innodb_mrr_cpk.test | 13 +++++--- sql/multi_range_read.cc | 52 +++++++++++++++++++++++------- 3 files changed, 62 insertions(+), 17 deletions(-) diff --git a/mysql-test/r/innodb_mrr_cpk.result b/mysql-test/r/innodb_mrr_cpk.result index f93807e14d8..469d78e8e45 100644 --- a/mysql-test/r/innodb_mrr_cpk.result +++ b/mysql-test/r/innodb_mrr_cpk.result @@ -128,6 +128,20 @@ a b c filler a b 11 11 12 filler 11 11 11 11 13 filler 11 11 set join_cache_level=6; +explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using index condition(BKA); Using join buffer +select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +a b c filler a b +set optimizer_switch='index_condition_pushdown=off'; +explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 +1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using where; Using join buffer +select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +a b c filler a b +set optimizer_switch='index_condition_pushdown=on'; drop table t1,t2; set @@join_cache_level= @save_join_cache_level; set storage_engine=@save_storage_engine; diff --git a/mysql-test/t/innodb_mrr_cpk.test b/mysql-test/t/innodb_mrr_cpk.test index 84b37840880..69eeef9618f 100644 --- a/mysql-test/t/innodb_mrr_cpk.test +++ b/mysql-test/t/innodb_mrr_cpk.test @@ -112,21 +112,24 @@ insert into t2 values (11,33), (11,22), (11,11); explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; +# Check a real resultset for comaprison: set join_cache_level=0; select * from t1, t2 where t1.a=t2.a and t1.b=t2.b; set join_cache_level=6; -drop table t1,t2; # # Check that Index Condition Pushdown (BKA) actually works: # +explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; -# TODO +set optimizer_switch='index_condition_pushdown=off'; +explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100; +set optimizer_switch='index_condition_pushdown=on'; -# -# Check that record-check-func is done: -# +drop table t1,t2; set @@join_cache_level= @save_join_cache_level; set storage_engine=@save_storage_engine; diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 46790adee9e..9c0a0233e0e 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -624,7 +624,6 @@ void DsMrr_impl::dsmrr_fill_buffer_cpk() { DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length); - /* Put key, or {key, range_id} pair into the buffer */ memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length); mrr_buf_cur += cpk_tuple_length; @@ -654,7 +653,8 @@ void DsMrr_impl::dsmrr_fill_buffer_cpk() DS-MRR/CPK: multi_range_read_next() function DESCRIPTION - DsMrr_impl::dsmrr_next_cpk() + DsMrr_impl::dsmrr_next_cpk() + range_info OUT identifier of range that the returned record belongs to DESCRIPTION DS-MRR/CPK: multi_range_read_next() function. @@ -673,16 +673,31 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) { int res; - if (cpk_have_range) + while (cpk_have_range) { + + if (h->mrr_funcs.skip_record && + h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL)) + { + cpk_have_range= FALSE; + break; + } + res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length); + + if (h->mrr_funcs.skip_index_tuple && + h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info)) + continue; + if (res != HA_ERR_END_OF_FILE) { if (is_mrr_assoc) memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); return res; } - /* No more records in this range. Fall through to get to another range */ + + /* No more records in this range. Exit this loop and go get another range */ + cpk_have_range= FALSE; } do @@ -703,30 +718,43 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) goto end; } - //psergey2-todo: make skip_index_tuple() calls, too? - //psergey2-todo: skip-record calls here? - //if (h2->mrr_funcs.skip_record && - // h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) - // continue; - /* Ok, got the range. Try making a lookup. */ uchar *lookup_tuple= mrr_buf_cur; mrr_buf_cur += cpk_tuple_length; if (is_mrr_assoc) { - memcpy(cpk_saved_range_info, mrr_buf_cur, sizeof(void*)); + memcpy(&cpk_saved_range_info, mrr_buf_cur, sizeof(void*)); mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc); } + if (h->mrr_funcs.skip_record && + h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL)) + continue; + res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, HA_READ_KEY_EXACT); + /* + Check pushed index condition. Performance-wise, it does not make any + sense to put this call here (the above call has already accessed the full + record). That's the best I could do, though, because: + - ha_innobase doesn't support IndexConditionPushdown on clustered PK + - MRR interface doesn't allow the storage engine to refuse a pushed index + condition. + Having this call here is not fully harmless: EXPLAIN shows "pushed index + condition", which is technically true but doesn't bring the benefits that + one might expect. + */ + if (h->mrr_funcs.skip_index_tuple && + h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info)) + continue; + if (res && res != HA_ERR_END_OF_FILE) goto end; if (!res) { - memcpy(range_info, cpk_saved_range_info, sizeof(void*)); + memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); /* Attempt reading more rows from this range only if there actually can be multiple matches: From 488d352a662438a3c34fd1b41261a94be7572f38 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Tue, 22 Jun 2010 23:26:11 +0400 Subject: [PATCH 5/5] MWL#121: DS-MRR support for clustered primary keys - Remove back key_parts from multi_range_read_init() parameters - Related code simplification/cleanup --- sql/handler.h | 8 +-- sql/multi_range_read.cc | 75 ++++++++++++----------------- sql/multi_range_read.h | 3 +- sql/opt_range.cc | 1 - sql/sql_join_cache.cc | 3 +- storage/maria/ha_maria.cc | 7 ++- storage/maria/ha_maria.h | 3 +- storage/myisam/ha_myisam.cc | 4 +- storage/myisam/ha_myisam.h | 3 +- storage/xtradb/handler/ha_innodb.cc | 4 +- storage/xtradb/handler/ha_innodb.h | 3 +- 11 files changed, 47 insertions(+), 67 deletions(-) diff --git a/sql/handler.h b/sql/handler.h index f2cc50de38a..3e173905f66 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1168,9 +1168,9 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, COST_VECT *cost); /* - The below two are not used (and not handled) in this milestone of this WL - entry because there seems to be no use for them at this stage of - implementation. + Indicates that all scanned ranges will be singlepoint (aka equality) ranges. + The ranges may not use the full key but all of them will use the same number + of key parts. */ #define HA_MRR_SINGLE_POINT 1 #define HA_MRR_FIXED_KEY 2 @@ -1755,7 +1755,7 @@ public: uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost); virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, + uint n_ranges, uint mode, HANDLER_BUFFER *buf); virtual int multi_range_read_next(char **range_info); virtual int read_range_first(const key_range *start_key, diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 9c0a0233e0e..c86143c4a12 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -1,4 +1,5 @@ #include "mysql_priv.h" +#include #include "sql_select.h" /**************************************************************************** @@ -203,8 +204,7 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, int handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf) + uint n_ranges, uint mode, HANDLER_BUFFER *buf) { DBUG_ENTER("handler::multi_range_read_init"); mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); @@ -306,8 +306,7 @@ scan_it_again: */ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, - void *seq_init_param, uint n_ranges, uint key_parts, - uint mode, + void *seq_init_param, uint n_ranges, uint mode, HANDLER_BUFFER *buf) { uint elem_size; @@ -324,8 +323,8 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, { use_default_impl= TRUE; const int retval= - h->handler::multi_range_read_init(seq_funcs, seq_init_param, - n_ranges, key_parts, mode, buf); + h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, + mode, buf); DBUG_RETURN(retval); } mrr_buf= buf->buffer; @@ -337,51 +336,25 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, mrr_buf_end= buf->buffer_end; - - doing_cpk_scan= check_cpk_scan(h->active_index, mode); - if (doing_cpk_scan) + if ((doing_cpk_scan= check_cpk_scan(h->active_index, mode))) { - /* - When doing a scan on CPK, the buffer stores {lookup_tuple, range_id} - pairs - */ - uint keylen=0; - DBUG_ASSERT(key_parts != 0); - for (uint kp= 0; kp < key_parts; kp++) - keylen += table->key_info[h->active_index].key_part[kp].store_length; - - cpk_tuple_length= keylen; - cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts == - key_parts); + /* It's a DS-MRR/CPK scan */ + cpk_tuple_length= 0; /* dummy value telling it needs to be inited */ cpk_have_range= FALSE; - elem_size= keylen + (int)is_mrr_assoc * sizeof(void*); use_default_impl= FALSE; - } - else - { - /* In regular DS-MRR, buffer stores {rowid, range_id} pairs */ - elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - } - - mrr_buf_last= mrr_buf + - ((mrr_buf_end - mrr_buf)/ elem_size)* - elem_size; - mrr_buf_end= mrr_buf_last; - - if (doing_cpk_scan) - { - /* - DS-MRR/CPK: fill buffer with lookup tuples and sort; also we don't need a - secondary handler object. - */ h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); h->mrr_funcs= *seq_funcs; dsmrr_fill_buffer_cpk(); - if (dsmrr_eof) + if (dsmrr_eof) buf->end_of_used_area= mrr_buf_last; DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */ } + /* In regular DS-MRR, buffer stores {rowid, range_id} pairs */ + elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/ elem_size)* elem_size; + mrr_buf_end= mrr_buf_last; + /* There can be two cases: - This is the first call since index_init(), h2==NULL @@ -454,8 +427,8 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, goto error; } - if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, - key_parts, mode, buf) || + if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, + mode, buf) || dsmrr_fill_buffer()) { goto error; @@ -604,6 +577,9 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) /* DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort + + SYNOPSIS + DsMrr_impl::dsmrr_fill_buffer_cpk() DESCRIPTION DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort @@ -623,7 +599,18 @@ void DsMrr_impl::dsmrr_fill_buffer_cpk() !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range))) { DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); - DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length); + DBUG_ASSERT(!cpk_tuple_length || + cpk_tuple_length == cur_range.start_key.length); + if (!cpk_tuple_length) + { + cpk_tuple_length= cur_range.start_key.length; + cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts == + my_count_bits(cur_range.start_key.keypart_map)); + uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*); + mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/elem_size) * elem_size; + mrr_buf_end= mrr_buf_last; + } + /* Put key, or {key, range_id} pair into the buffer */ memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length); mrr_buf_cur += cpk_tuple_length; diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h index 5dd2e0d6adf..7a5e57e490e 100644 --- a/sql/multi_range_read.h +++ b/sql/multi_range_read.h @@ -87,8 +87,7 @@ public: table= table_arg; } int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf); + uint n_ranges, uint mode, HANDLER_BUFFER *buf); void dsmrr_close(); int dsmrr_next(char **range_info); diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 25c4259295f..ad0f9301b7f 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8368,7 +8368,6 @@ int QUICK_RANGE_SELECT::reset() RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0}; error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements, - uint(-1), mrr_flags, mrr_buf_desc? mrr_buf_desc: &empty_buf); DBUG_RETURN(error); diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 120b109d8ff..c536026214c 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -2376,8 +2376,7 @@ JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges) */ if (!file->inited) file->ha_index_init(join_tab->ref.key, 1); - if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges, - join_tab->ref.key_parts, + if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges, mrr_mode, &mrr_buff))) rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 43c6cd6606a..e27983989d8 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -3501,11 +3501,10 @@ static SHOW_VAR status_variables[]= { ***************************************************************************/ int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf) + uint n_ranges, uint mode, + HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, - mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); } int ha_maria::multi_range_read_next(char **range_info) diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 177008f422a..6901229bb44 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -174,8 +174,7 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf); + uint n_ranges, uint mode, HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index bb6ac446a4f..95ab5cb167e 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -2217,10 +2217,10 @@ static int myisam_init(void *p) ***************************************************************************/ int ha_myisam::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, + uint n_ranges, uint mode, HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); } int ha_myisam::multi_range_read_next(char **range_info) diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index d37870b861b..f5428e653c4 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -162,8 +162,7 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf); + uint n_ranges, uint mode, HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index a8ccb426aa5..8aff0103e20 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -11025,10 +11025,10 @@ test_innobase_convert_name() */ int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, + uint n_ranges, uint mode, HANDLER_BUFFER *buf) { - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, key_parts, mode, buf); + return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); } int ha_innobase::multi_range_read_next(char **range_info) diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 0c1f2b42dd6..41a073e4374 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -210,8 +210,7 @@ public: * Multi Range Read interface */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint key_parts, uint mode, - HANDLER_BUFFER *buf); + uint n_ranges, uint mode, HANDLER_BUFFER *buf); int multi_range_read_next(char **range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param,