1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

Original idea from Zardosht Kasheff to add HA_CLUSTERED_INDEX

- Added a lot of code comments
- Updated get_best_ror_intersec() to prefer index scan on not clustered keys before clustered keys.
- Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY
- For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered()
- Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect.

sql/ha_partition.h:
  Added comment with warning for code unsafe to use with multiple storage engines at the same time
sql/handler.h:
  Added HA_CLUSTERED_INDEX.
  Documented primary_key_is_clustered()
sql/opt_range.cc:
  Added code comments
  Updated get_best_ror_intersec() to ignore clustered keys.
  Optimized away cpk_scan_used and one instance of current_thd (Simpler code)
  Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY
sql/sql_select.cc:
  Changed comment to #ifdef
  For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered()
  (Change is smaller than what it looks beause of indentation change)
sql/sql_table.cc:
  Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect.
storage/innobase/handler/ha_innodb.h:
  Added support for HA_CLUSTERED_INDEX
storage/innodb_plugin/handler/ha_innodb.cc:
  Added support for HA_CLUSTERED_INDEX
storage/xtradb/handler/ha_innodb.cc:
  Added support for HA_CLUSTERED_INDEX
This commit is contained in:
Michael Widenius
2011-05-18 19:26:30 +03:00
parent 5c70f813f3
commit 3631146442
8 changed files with 123 additions and 63 deletions

View File

@@ -877,6 +877,10 @@ public:
*/
virtual ulong index_flags(uint inx, uint part, bool all_parts) const
{
/*
The following code is not safe if you are using different
storage engines or different index types per partition.
*/
return m_file[0]->index_flags(inx, part, all_parts);
}

View File

@@ -161,8 +161,11 @@
*/
#define HA_KEY_SCAN_NOT_ROR 128
#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */
/*
Data is clustered on this key. This means that when you read the key
you also get the row data in the same block.
*/
#define HA_CLUSTERED_INDEX 512
/*
bits in alter_table_flags:
@@ -2311,9 +2314,22 @@ public:
/*
@retval TRUE Primary key (if there is one) is clustered
key covering all fields
@retval FALSE otherwise
Check if the primary key (if there is one) is a clustered key covering
all fields. This means:
- Data is stored together with the primary key (no secondary lookup
needed to find the row data). The optimizer uses this to find out
the cost of fetching data.
- The primary key is part of each secondary key and is used
to find the row data in the primary index when reading trough
secondary indexes.
- When doing a HA_KEYREAD_ONLY we get also all the primary key parts
into the row. This is critical property used by index_merge.
For a clustered primary key, index_flags() returns also HA_CLUSTERED_INDEX
@retval TRUE yes
@retval FALSE No.
*/
virtual bool primary_key_is_clustered() { return FALSE; }
virtual int cmp_ref(const uchar *ref1, const uchar *ref2)

View File

@@ -1816,6 +1816,12 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
DBUG_VOID_RETURN;
}
/*
QUICK_INDEX_SORT_SELECT works as follows:
- Do index scans, accumulate rowids in the Unique object
(Unique will also sort and de-duplicate rowids)
- Use rowids from unique to run a disk-ordered sweep
*/
QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
TABLE *table)
@@ -1848,7 +1854,18 @@ QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
if (head->file->primary_key_is_clustered() &&
quick_sel_range->index == head->s->primary_key)
{
/* A quick_select over a clustered primary key is handled specifically */
/*
A quick_select over a clustered primary key is handled specifically
Here we assume:
- PK columns are included in any other merged index
- Scan on the PK is disk-ordered.
(not meeting #2 will only cause performance degradation)
We could treat clustered PK as any other index, but that would
be inefficient. There is no point in doing scan on
CPK, remembering the rowid, then making rnd_pos() call with
that rowid.
*/
pk_quick_select= quick_sel_range;
DBUG_RETURN(0);
}
@@ -4298,11 +4315,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
DBUG_ENTER("get_sweep_read_cost");
if (param->table->file->primary_key_is_clustered())
{
/*
We are using the primary key to find the rows.
Calculate the cost for this.
*/
result= param->table->file->read_time(param->table->s->primary_key,
(uint)records, records);
}
else
{
/*
Rows will be retreived with rnd_pos(). Caluclate the expected
cost for this.
*/
double n_blocks=
ceil(ulonglong2double(param->table->file->stats.data_file_length) /
IO_SIZE);
@@ -6187,7 +6212,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
ROR_SCAN_INFO **cur_ror_scan;
ROR_SCAN_INFO *cpk_scan= NULL;
uint cpk_no;
bool cpk_scan_used= FALSE;
if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
sizeof(ROR_SCAN_INFO*)*
@@ -6199,11 +6223,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
{
ROR_SCAN_INFO *scan;
uint key_no;
if (!tree->ror_scans_map.is_set(idx))
continue;
key_no= param->real_keynr[idx];
if (key_no != cpk_no &&
param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
{
/* Ignore clustering keys */
tree->n_ror_scans--;
continue;
}
if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
return NULL;
if (param->real_keynr[idx] == cpk_no)
if (key_no == cpk_no)
{
cpk_scan= scan;
tree->n_ror_scans--;
@@ -6289,15 +6322,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
{
if (ror_intersect_add(intersect, cpk_scan, TRUE) &&
(intersect->total_cost < min_cost))
{
cpk_scan_used= TRUE;
intersect_best= intersect; //just set pointer here
}
}
else
cpk_scan= 0; // Don't use cpk_scan
/* Ok, return ROR-intersect plan if we have found one */
TRP_ROR_INTERSECT *trp= NULL;
if (min_cost < read_time && (cpk_scan_used || best_num > 1))
if (min_cost < read_time && (cpk_scan || best_num > 1))
{
if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
DBUG_RETURN(trp);
@@ -6316,7 +6348,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
set_if_smaller(param->table->quick_condition_rows, best_rows);
trp->records= best_rows;
trp->index_scan_costs= intersect_best->index_scan_costs;
trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
trp->cpk_scan= cpk_scan;
DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
"cost %g, records %lu",
trp->read_cost, (ulong) trp->records));
@@ -9511,10 +9543,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
bool pk_is_clustered= file->primary_key_is_clustered();
if (index_only &&
(file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
!(pk_is_clustered && keynr == param->table->s->primary_key))
!(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
*mrr_flags |= HA_MRR_INDEX_ONLY;
if (current_thd->lex->sql_command != SQLCOM_SELECT)
if (param->thd->lex->sql_command != SQLCOM_SELECT)
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
*bufsize= param->thd->variables.mrr_buff_size;

View File

@@ -8550,17 +8550,19 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
else if (!table->covering_keys.is_clear_all() &&
!(tab->select && tab->select->quick))
{ // Only read index tree
#ifdef BAD_OPTIMIZATION
/*
It has turned out that the below change, while speeding things
up for disk-bound loads, slows them down for cases when the data
is in disk cache (see BUG#35850):
// See bug #26447: "Using the clustered index for a table scan
// is always faster than using a secondary index".
See bug #26447: "Using the clustered index for a table scan
is always faster than using a secondary index".
*/
if (table->s->primary_key != MAX_KEY &&
table->file->primary_key_is_clustered())
tab->index= table->s->primary_key;
else
*/
#endif
tab->index=find_shortest_key(table, & table->covering_keys);
tab->read_first_record= join_read_first;
/* Read with index_first / index_next */
@@ -16525,9 +16527,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
*/
DBUG_ASSERT (ref_key != (int) nr);
bool is_covering= table->covering_keys.is_set(nr) ||
(nr == table->s->primary_key &&
table->file->primary_key_is_clustered());
bool is_covering= (table->covering_keys.is_set(nr) ||
(table->file->index_flags(nr, 0, 1) &
HA_CLUSTERED_INDEX));
/*
Don't use an index scan with ORDER BY without limit.
@@ -16680,17 +16682,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
/*
filesort() and join cache are usually faster than reading in
index order and not using join cache, except in case that chosen
index is clustered primary key.
index is clustered key.
*/
if ((select_limit >= table_records) &&
if (best_key < 0 ||
((select_limit >= table_records) &&
(tab->type == JT_ALL &&
tab->join->tables > tab->join->const_tables + 1) &&
((unsigned) best_key != table->s->primary_key ||
!table->file->primary_key_is_clustered()))
!(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
goto use_filesort;
if (best_key >= 0)
{
if (table->quick_keys.is_set(best_key) && best_key != ref_key)
{
key_map map;
@@ -16713,9 +16713,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
used_key_parts= (order_direction == -1) ?
saved_best_key_parts : best_key_parts;
}
else
goto use_filesort;
}
check_reverse_order:
DBUG_ASSERT(order_direction != 0);

View File

@@ -7983,7 +7983,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
if (order)
{
if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
if (to->s->primary_key != MAX_KEY &&
to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
{
char warn_buff[MYSQL_ERRMSG_SIZE];
my_snprintf(warn_buff, sizeof(warn_buff),

View File

@@ -98,10 +98,14 @@ class ha_innobase: public handler
Table_flags table_flags() const;
ulong index_flags(uint idx, uint part, bool all_parts) const
{
ulong extra_flag= 0;
if (table && idx == table->s->primary_key)
extra_flag= HA_CLUSTERED_INDEX;
return (HA_READ_NEXT |
HA_READ_PREV |
HA_READ_ORDER |
HA_READ_RANGE |
extra_flag |
HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MAX_KEY; }

View File

@@ -2995,12 +2995,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
uint,
uint index,
uint,
bool)
const
{
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
ulong extra_flag= 0;
if (table && index == table->s->primary_key)
extra_flag= HA_CLUSTERED_INDEX;
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY);
}

View File

@@ -3408,12 +3408,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
uint,
uint,
bool)
uint index,
uint part,
bool all_parts)
const
{
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
ulong extra_flag= 0;
if (table && index == table->s->primary_key)
extra_flag= HA_CLUSTERED_INDEX;
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}