mirror of
https://github.com/MariaDB/server.git
synced 2025-08-09 22:24:09 +03:00
Original idea from Zardosht Kasheff to add HA_CLUSTERED_INDEX
- Added a lot of code comments - Updated get_best_ror_intersec() to prefer index scan on not clustered keys before clustered keys. - Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY - For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() - Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. sql/ha_partition.h: Added comment with warning for code unsafe to use with multiple storage engines at the same time sql/handler.h: Added HA_CLUSTERED_INDEX. Documented primary_key_is_clustered() sql/opt_range.cc: Added code comments Updated get_best_ror_intersec() to ignore clustered keys. Optimized away cpk_scan_used and one instance of current_thd (Simpler code) Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY sql/sql_select.cc: Changed comment to #ifdef For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() (Change is smaller than what it looks beause of indentation change) sql/sql_table.cc: Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. storage/innobase/handler/ha_innodb.h: Added support for HA_CLUSTERED_INDEX storage/innodb_plugin/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX storage/xtradb/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX
This commit is contained in:
@@ -877,6 +877,10 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ulong index_flags(uint inx, uint part, bool all_parts) const
|
virtual ulong index_flags(uint inx, uint part, bool all_parts) const
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
The following code is not safe if you are using different
|
||||||
|
storage engines or different index types per partition.
|
||||||
|
*/
|
||||||
return m_file[0]->index_flags(inx, part, all_parts);
|
return m_file[0]->index_flags(inx, part, all_parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -161,8 +161,11 @@
|
|||||||
*/
|
*/
|
||||||
#define HA_KEY_SCAN_NOT_ROR 128
|
#define HA_KEY_SCAN_NOT_ROR 128
|
||||||
#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */
|
#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */
|
||||||
|
/*
|
||||||
|
Data is clustered on this key. This means that when you read the key
|
||||||
|
you also get the row data in the same block.
|
||||||
|
*/
|
||||||
|
#define HA_CLUSTERED_INDEX 512
|
||||||
|
|
||||||
/*
|
/*
|
||||||
bits in alter_table_flags:
|
bits in alter_table_flags:
|
||||||
@@ -2311,9 +2314,22 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@retval TRUE Primary key (if there is one) is clustered
|
Check if the primary key (if there is one) is a clustered key covering
|
||||||
key covering all fields
|
all fields. This means:
|
||||||
@retval FALSE otherwise
|
|
||||||
|
- Data is stored together with the primary key (no secondary lookup
|
||||||
|
needed to find the row data). The optimizer uses this to find out
|
||||||
|
the cost of fetching data.
|
||||||
|
- The primary key is part of each secondary key and is used
|
||||||
|
to find the row data in the primary index when reading trough
|
||||||
|
secondary indexes.
|
||||||
|
- When doing a HA_KEYREAD_ONLY we get also all the primary key parts
|
||||||
|
into the row. This is critical property used by index_merge.
|
||||||
|
|
||||||
|
For a clustered primary key, index_flags() returns also HA_CLUSTERED_INDEX
|
||||||
|
|
||||||
|
@retval TRUE yes
|
||||||
|
@retval FALSE No.
|
||||||
*/
|
*/
|
||||||
virtual bool primary_key_is_clustered() { return FALSE; }
|
virtual bool primary_key_is_clustered() { return FALSE; }
|
||||||
virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
|
virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
|
||||||
|
@@ -1816,6 +1816,12 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
|
|||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
QUICK_INDEX_SORT_SELECT works as follows:
|
||||||
|
- Do index scans, accumulate rowids in the Unique object
|
||||||
|
(Unique will also sort and de-duplicate rowids)
|
||||||
|
- Use rowids from unique to run a disk-ordered sweep
|
||||||
|
*/
|
||||||
|
|
||||||
QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
|
QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
|
||||||
TABLE *table)
|
TABLE *table)
|
||||||
@@ -1848,7 +1854,18 @@ QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
|
|||||||
if (head->file->primary_key_is_clustered() &&
|
if (head->file->primary_key_is_clustered() &&
|
||||||
quick_sel_range->index == head->s->primary_key)
|
quick_sel_range->index == head->s->primary_key)
|
||||||
{
|
{
|
||||||
/* A quick_select over a clustered primary key is handled specifically */
|
/*
|
||||||
|
A quick_select over a clustered primary key is handled specifically
|
||||||
|
Here we assume:
|
||||||
|
- PK columns are included in any other merged index
|
||||||
|
- Scan on the PK is disk-ordered.
|
||||||
|
(not meeting #2 will only cause performance degradation)
|
||||||
|
|
||||||
|
We could treat clustered PK as any other index, but that would
|
||||||
|
be inefficient. There is no point in doing scan on
|
||||||
|
CPK, remembering the rowid, then making rnd_pos() call with
|
||||||
|
that rowid.
|
||||||
|
*/
|
||||||
pk_quick_select= quick_sel_range;
|
pk_quick_select= quick_sel_range;
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
@@ -4298,11 +4315,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
|
|||||||
DBUG_ENTER("get_sweep_read_cost");
|
DBUG_ENTER("get_sweep_read_cost");
|
||||||
if (param->table->file->primary_key_is_clustered())
|
if (param->table->file->primary_key_is_clustered())
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
We are using the primary key to find the rows.
|
||||||
|
Calculate the cost for this.
|
||||||
|
*/
|
||||||
result= param->table->file->read_time(param->table->s->primary_key,
|
result= param->table->file->read_time(param->table->s->primary_key,
|
||||||
(uint)records, records);
|
(uint)records, records);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
Rows will be retreived with rnd_pos(). Caluclate the expected
|
||||||
|
cost for this.
|
||||||
|
*/
|
||||||
double n_blocks=
|
double n_blocks=
|
||||||
ceil(ulonglong2double(param->table->file->stats.data_file_length) /
|
ceil(ulonglong2double(param->table->file->stats.data_file_length) /
|
||||||
IO_SIZE);
|
IO_SIZE);
|
||||||
@@ -6187,7 +6212,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
|
|||||||
ROR_SCAN_INFO **cur_ror_scan;
|
ROR_SCAN_INFO **cur_ror_scan;
|
||||||
ROR_SCAN_INFO *cpk_scan= NULL;
|
ROR_SCAN_INFO *cpk_scan= NULL;
|
||||||
uint cpk_no;
|
uint cpk_no;
|
||||||
bool cpk_scan_used= FALSE;
|
|
||||||
|
|
||||||
if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
|
if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
|
||||||
sizeof(ROR_SCAN_INFO*)*
|
sizeof(ROR_SCAN_INFO*)*
|
||||||
@@ -6199,11 +6223,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
|
|||||||
for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
|
for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
|
||||||
{
|
{
|
||||||
ROR_SCAN_INFO *scan;
|
ROR_SCAN_INFO *scan;
|
||||||
|
uint key_no;
|
||||||
if (!tree->ror_scans_map.is_set(idx))
|
if (!tree->ror_scans_map.is_set(idx))
|
||||||
continue;
|
continue;
|
||||||
|
key_no= param->real_keynr[idx];
|
||||||
|
if (key_no != cpk_no &&
|
||||||
|
param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
|
||||||
|
{
|
||||||
|
/* Ignore clustering keys */
|
||||||
|
tree->n_ror_scans--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
|
if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (param->real_keynr[idx] == cpk_no)
|
if (key_no == cpk_no)
|
||||||
{
|
{
|
||||||
cpk_scan= scan;
|
cpk_scan= scan;
|
||||||
tree->n_ror_scans--;
|
tree->n_ror_scans--;
|
||||||
@@ -6289,15 +6322,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
|
|||||||
{
|
{
|
||||||
if (ror_intersect_add(intersect, cpk_scan, TRUE) &&
|
if (ror_intersect_add(intersect, cpk_scan, TRUE) &&
|
||||||
(intersect->total_cost < min_cost))
|
(intersect->total_cost < min_cost))
|
||||||
{
|
|
||||||
cpk_scan_used= TRUE;
|
|
||||||
intersect_best= intersect; //just set pointer here
|
intersect_best= intersect; //just set pointer here
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
|
cpk_scan= 0; // Don't use cpk_scan
|
||||||
|
|
||||||
/* Ok, return ROR-intersect plan if we have found one */
|
/* Ok, return ROR-intersect plan if we have found one */
|
||||||
TRP_ROR_INTERSECT *trp= NULL;
|
TRP_ROR_INTERSECT *trp= NULL;
|
||||||
if (min_cost < read_time && (cpk_scan_used || best_num > 1))
|
if (min_cost < read_time && (cpk_scan || best_num > 1))
|
||||||
{
|
{
|
||||||
if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
|
if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
|
||||||
DBUG_RETURN(trp);
|
DBUG_RETURN(trp);
|
||||||
@@ -6316,7 +6348,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
|
|||||||
set_if_smaller(param->table->quick_condition_rows, best_rows);
|
set_if_smaller(param->table->quick_condition_rows, best_rows);
|
||||||
trp->records= best_rows;
|
trp->records= best_rows;
|
||||||
trp->index_scan_costs= intersect_best->index_scan_costs;
|
trp->index_scan_costs= intersect_best->index_scan_costs;
|
||||||
trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
|
trp->cpk_scan= cpk_scan;
|
||||||
DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
|
DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
|
||||||
"cost %g, records %lu",
|
"cost %g, records %lu",
|
||||||
trp->read_cost, (ulong) trp->records));
|
trp->read_cost, (ulong) trp->records));
|
||||||
@@ -9511,10 +9543,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
|
|||||||
bool pk_is_clustered= file->primary_key_is_clustered();
|
bool pk_is_clustered= file->primary_key_is_clustered();
|
||||||
if (index_only &&
|
if (index_only &&
|
||||||
(file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
|
(file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
|
||||||
!(pk_is_clustered && keynr == param->table->s->primary_key))
|
!(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
|
||||||
*mrr_flags |= HA_MRR_INDEX_ONLY;
|
*mrr_flags |= HA_MRR_INDEX_ONLY;
|
||||||
|
|
||||||
if (current_thd->lex->sql_command != SQLCOM_SELECT)
|
if (param->thd->lex->sql_command != SQLCOM_SELECT)
|
||||||
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
|
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
|
||||||
|
|
||||||
*bufsize= param->thd->variables.mrr_buff_size;
|
*bufsize= param->thd->variables.mrr_buff_size;
|
||||||
|
@@ -8550,17 +8550,19 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
|
|||||||
else if (!table->covering_keys.is_clear_all() &&
|
else if (!table->covering_keys.is_clear_all() &&
|
||||||
!(tab->select && tab->select->quick))
|
!(tab->select && tab->select->quick))
|
||||||
{ // Only read index tree
|
{ // Only read index tree
|
||||||
|
#ifdef BAD_OPTIMIZATION
|
||||||
/*
|
/*
|
||||||
It has turned out that the below change, while speeding things
|
It has turned out that the below change, while speeding things
|
||||||
up for disk-bound loads, slows them down for cases when the data
|
up for disk-bound loads, slows them down for cases when the data
|
||||||
is in disk cache (see BUG#35850):
|
is in disk cache (see BUG#35850):
|
||||||
// See bug #26447: "Using the clustered index for a table scan
|
See bug #26447: "Using the clustered index for a table scan
|
||||||
// is always faster than using a secondary index".
|
is always faster than using a secondary index".
|
||||||
|
*/
|
||||||
if (table->s->primary_key != MAX_KEY &&
|
if (table->s->primary_key != MAX_KEY &&
|
||||||
table->file->primary_key_is_clustered())
|
table->file->primary_key_is_clustered())
|
||||||
tab->index= table->s->primary_key;
|
tab->index= table->s->primary_key;
|
||||||
else
|
else
|
||||||
*/
|
#endif
|
||||||
tab->index=find_shortest_key(table, & table->covering_keys);
|
tab->index=find_shortest_key(table, & table->covering_keys);
|
||||||
tab->read_first_record= join_read_first;
|
tab->read_first_record= join_read_first;
|
||||||
/* Read with index_first / index_next */
|
/* Read with index_first / index_next */
|
||||||
@@ -16525,9 +16527,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
|||||||
*/
|
*/
|
||||||
DBUG_ASSERT (ref_key != (int) nr);
|
DBUG_ASSERT (ref_key != (int) nr);
|
||||||
|
|
||||||
bool is_covering= table->covering_keys.is_set(nr) ||
|
bool is_covering= (table->covering_keys.is_set(nr) ||
|
||||||
(nr == table->s->primary_key &&
|
(table->file->index_flags(nr, 0, 1) &
|
||||||
table->file->primary_key_is_clustered());
|
HA_CLUSTERED_INDEX));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Don't use an index scan with ORDER BY without limit.
|
Don't use an index scan with ORDER BY without limit.
|
||||||
@@ -16680,17 +16682,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
|||||||
/*
|
/*
|
||||||
filesort() and join cache are usually faster than reading in
|
filesort() and join cache are usually faster than reading in
|
||||||
index order and not using join cache, except in case that chosen
|
index order and not using join cache, except in case that chosen
|
||||||
index is clustered primary key.
|
index is clustered key.
|
||||||
*/
|
*/
|
||||||
if ((select_limit >= table_records) &&
|
if (best_key < 0 ||
|
||||||
|
((select_limit >= table_records) &&
|
||||||
(tab->type == JT_ALL &&
|
(tab->type == JT_ALL &&
|
||||||
tab->join->tables > tab->join->const_tables + 1) &&
|
tab->join->tables > tab->join->const_tables + 1) &&
|
||||||
((unsigned) best_key != table->s->primary_key ||
|
!(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
|
||||||
!table->file->primary_key_is_clustered()))
|
|
||||||
goto use_filesort;
|
goto use_filesort;
|
||||||
|
|
||||||
if (best_key >= 0)
|
|
||||||
{
|
|
||||||
if (table->quick_keys.is_set(best_key) && best_key != ref_key)
|
if (table->quick_keys.is_set(best_key) && best_key != ref_key)
|
||||||
{
|
{
|
||||||
key_map map;
|
key_map map;
|
||||||
@@ -16713,9 +16713,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
|||||||
used_key_parts= (order_direction == -1) ?
|
used_key_parts= (order_direction == -1) ?
|
||||||
saved_best_key_parts : best_key_parts;
|
saved_best_key_parts : best_key_parts;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
goto use_filesort;
|
|
||||||
}
|
|
||||||
|
|
||||||
check_reverse_order:
|
check_reverse_order:
|
||||||
DBUG_ASSERT(order_direction != 0);
|
DBUG_ASSERT(order_direction != 0);
|
||||||
|
@@ -7983,7 +7983,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
|
|||||||
|
|
||||||
if (order)
|
if (order)
|
||||||
{
|
{
|
||||||
if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
|
if (to->s->primary_key != MAX_KEY &&
|
||||||
|
to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
|
||||||
{
|
{
|
||||||
char warn_buff[MYSQL_ERRMSG_SIZE];
|
char warn_buff[MYSQL_ERRMSG_SIZE];
|
||||||
my_snprintf(warn_buff, sizeof(warn_buff),
|
my_snprintf(warn_buff, sizeof(warn_buff),
|
||||||
|
@@ -98,10 +98,14 @@ class ha_innobase: public handler
|
|||||||
Table_flags table_flags() const;
|
Table_flags table_flags() const;
|
||||||
ulong index_flags(uint idx, uint part, bool all_parts) const
|
ulong index_flags(uint idx, uint part, bool all_parts) const
|
||||||
{
|
{
|
||||||
|
ulong extra_flag= 0;
|
||||||
|
if (table && idx == table->s->primary_key)
|
||||||
|
extra_flag= HA_CLUSTERED_INDEX;
|
||||||
return (HA_READ_NEXT |
|
return (HA_READ_NEXT |
|
||||||
HA_READ_PREV |
|
HA_READ_PREV |
|
||||||
HA_READ_ORDER |
|
HA_READ_ORDER |
|
||||||
HA_READ_RANGE |
|
HA_READ_RANGE |
|
||||||
|
extra_flag |
|
||||||
HA_KEYREAD_ONLY);
|
HA_KEYREAD_ONLY);
|
||||||
}
|
}
|
||||||
uint max_supported_keys() const { return MAX_KEY; }
|
uint max_supported_keys() const { return MAX_KEY; }
|
||||||
|
@@ -2995,12 +2995,15 @@ UNIV_INTERN
|
|||||||
ulong
|
ulong
|
||||||
ha_innobase::index_flags(
|
ha_innobase::index_flags(
|
||||||
/*=====================*/
|
/*=====================*/
|
||||||
uint,
|
uint index,
|
||||||
uint,
|
uint,
|
||||||
bool)
|
bool)
|
||||||
const
|
const
|
||||||
{
|
{
|
||||||
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
|
ulong extra_flag= 0;
|
||||||
|
if (table && index == table->s->primary_key)
|
||||||
|
extra_flag= HA_CLUSTERED_INDEX;
|
||||||
|
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
|
||||||
| HA_READ_RANGE | HA_KEYREAD_ONLY);
|
| HA_READ_RANGE | HA_KEYREAD_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3408,12 +3408,15 @@ UNIV_INTERN
|
|||||||
ulong
|
ulong
|
||||||
ha_innobase::index_flags(
|
ha_innobase::index_flags(
|
||||||
/*=====================*/
|
/*=====================*/
|
||||||
uint,
|
uint index,
|
||||||
uint,
|
uint part,
|
||||||
bool)
|
bool all_parts)
|
||||||
const
|
const
|
||||||
{
|
{
|
||||||
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
|
ulong extra_flag= 0;
|
||||||
|
if (table && index == table->s->primary_key)
|
||||||
|
extra_flag= HA_CLUSTERED_INDEX;
|
||||||
|
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
|
||||||
| HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
|
| HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user