mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
use tree for count(distinct) when possible
This commit is contained in:
@ -57,7 +57,7 @@ typedef struct st_tree {
|
|||||||
void (*free)(void *);
|
void (*free)(void *);
|
||||||
} TREE;
|
} TREE;
|
||||||
|
|
||||||
/* Functions on hole tree */
|
/* Functions on whole tree */
|
||||||
void init_tree(TREE *tree,uint default_alloc_size, int element_size,
|
void init_tree(TREE *tree,uint default_alloc_size, int element_size,
|
||||||
qsort_cmp2 compare, my_bool with_delete,
|
qsort_cmp2 compare, my_bool with_delete,
|
||||||
void (*free_element)(void*));
|
void (*free_element)(void*));
|
||||||
|
@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size,
|
|||||||
((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1))))
|
((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1))))
|
||||||
{
|
{
|
||||||
tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */
|
tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */
|
||||||
/* Fix allocation size so that we don't loose any memory */
|
/* Fix allocation size so that we don't lose any memory */
|
||||||
default_alloc_size/=(sizeof(TREE_ELEMENT)+size);
|
default_alloc_size/=(sizeof(TREE_ELEMENT)+size);
|
||||||
if (!default_alloc_size)
|
if (!default_alloc_size)
|
||||||
default_alloc_size=1;
|
default_alloc_size=1;
|
||||||
|
103
sql/item_sum.cc
103
sql/item_sum.cc
@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str)
|
|||||||
|
|
||||||
#include "sql_select.h"
|
#include "sql_select.h"
|
||||||
|
|
||||||
|
static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
|
||||||
|
{
|
||||||
|
return memcmp(key1, key2, (int)arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
|
||||||
|
{
|
||||||
|
return my_sortcmp(key1, key2, (int)arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// did not make this one static - at least gcc gets confused when
|
||||||
|
// I try to declare a static function as a friend. If you can figure
|
||||||
|
// out the syntax to make a static function a friend, make this one
|
||||||
|
// static
|
||||||
|
int composite_key_cmp(void* arg, byte* key1, byte* key2)
|
||||||
|
{
|
||||||
|
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
|
||||||
|
Field** field = item->table->field, **field_end;
|
||||||
|
field_end = field + item->table->fields;
|
||||||
|
for(; field < field_end; ++field)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
int len = (*field)->field_length;
|
||||||
|
switch((*field)->type())
|
||||||
|
{
|
||||||
|
case FIELD_TYPE_STRING:
|
||||||
|
case FIELD_TYPE_VAR_STRING:
|
||||||
|
res = my_sortcmp(key1, key2, len);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
res = memcmp(key1, key2, len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(res)
|
||||||
|
return res;
|
||||||
|
key1 += len;
|
||||||
|
key2 += len;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Item_sum_count_distinct::~Item_sum_count_distinct()
|
Item_sum_count_distinct::~Item_sum_count_distinct()
|
||||||
{
|
{
|
||||||
if (table)
|
if (table)
|
||||||
free_tmp_table(current_thd, table);
|
free_tmp_table(current_thd, table);
|
||||||
delete tmp_table_param;
|
delete tmp_table_param;
|
||||||
|
if(use_tree)
|
||||||
|
delete_tree(&tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd)
|
|||||||
0, 0, current_lex->options | thd->options)))
|
0, 0, current_lex->options | thd->options)))
|
||||||
return 1;
|
return 1;
|
||||||
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
|
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
|
||||||
|
|
||||||
|
if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be
|
||||||
|
// MyISAM
|
||||||
|
{
|
||||||
|
qsort_cmp2 compare_key;
|
||||||
|
void* cmp_arg;
|
||||||
|
int key_len;
|
||||||
|
|
||||||
|
if(table->fields == 1) // if we have only one field, which is
|
||||||
|
// the most common use of count(distinct), it is much faster
|
||||||
|
// to use a simpler key compare method that can take advantage
|
||||||
|
// of not having to worry about other fields
|
||||||
|
{
|
||||||
|
switch(table->field[0]->type())
|
||||||
|
{
|
||||||
|
// if we have a string, we must take care of charsets
|
||||||
|
// and case sensitivity
|
||||||
|
case FIELD_TYPE_STRING:
|
||||||
|
case FIELD_TYPE_VAR_STRING:
|
||||||
|
compare_key = (qsort_cmp2)simple_str_key_cmp;
|
||||||
|
break;
|
||||||
|
default: // since at this point we cannot have blobs
|
||||||
|
// anything else can be compared with memcmp
|
||||||
|
compare_key = (qsort_cmp2)simple_raw_key_cmp;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cmp_arg = (void*)(key_len = table->field[0]->field_length);
|
||||||
|
rec_offset = 1;
|
||||||
|
}
|
||||||
|
else // too bad, cannot cheat - there is more than one field
|
||||||
|
{
|
||||||
|
cmp_arg = (void*)this;
|
||||||
|
compare_key = (qsort_cmp2)composite_key_cmp;
|
||||||
|
Field** field, **field_end;
|
||||||
|
field_end = (field = table->field) + table->fields;
|
||||||
|
for(key_len = 0; field < field_end; ++field)
|
||||||
|
{
|
||||||
|
key_len += (*field)->field_length;
|
||||||
|
}
|
||||||
|
rec_offset = table->reclength - key_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
init_tree(&tree, 0, key_len, compare_key, 0, 0);
|
||||||
|
tree.cmp_arg = cmp_arg;
|
||||||
|
use_tree = 1;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset()
|
|||||||
table->file->extra(HA_EXTRA_NO_CACHE);
|
table->file->extra(HA_EXTRA_NO_CACHE);
|
||||||
table->file->delete_all_rows();
|
table->file->delete_all_rows();
|
||||||
table->file->extra(HA_EXTRA_WRITE_CACHE);
|
table->file->extra(HA_EXTRA_WRITE_CACHE);
|
||||||
|
if(use_tree)
|
||||||
|
delete_tree(&tree);
|
||||||
(void) add();
|
(void) add();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add()
|
|||||||
if ((*field)->is_real_null(0))
|
if ((*field)->is_real_null(0))
|
||||||
return 0; // Don't count NULL
|
return 0; // Don't count NULL
|
||||||
|
|
||||||
if ((error=table->file->write_row(table->record[0])))
|
if(use_tree)
|
||||||
|
{
|
||||||
|
if(!tree_insert(&tree, table->record[0] + rec_offset, 0))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if ((error=table->file->write_row(table->record[0])))
|
||||||
{
|
{
|
||||||
if (error != HA_ERR_FOUND_DUPP_KEY &&
|
if (error != HA_ERR_FOUND_DUPP_KEY &&
|
||||||
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
||||||
@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int()
|
|||||||
{
|
{
|
||||||
if (!table) // Empty query
|
if (!table) // Empty query
|
||||||
return LL(0);
|
return LL(0);
|
||||||
|
if(use_tree)
|
||||||
|
return tree.elements_in_tree;
|
||||||
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
|
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
|
||||||
return table->file->records;
|
return table->file->records;
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
#pragma interface /* gcc class implementation */
|
#pragma interface /* gcc class implementation */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <my_tree.h>
|
||||||
|
|
||||||
class Item_sum :public Item_result_field
|
class Item_sum :public Item_result_field
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int
|
|||||||
table_map used_table_cache;
|
table_map used_table_cache;
|
||||||
bool fix_fields(THD *thd,TABLE_LIST *tables);
|
bool fix_fields(THD *thd,TABLE_LIST *tables);
|
||||||
TMP_TABLE_PARAM *tmp_table_param;
|
TMP_TABLE_PARAM *tmp_table_param;
|
||||||
|
TREE tree;
|
||||||
|
bool use_tree; // If there are no blobs, we can use a tree, which
|
||||||
|
// is faster than heap table. In that case, we still use the table
|
||||||
|
// to help get things set up, but we insert nothing in it
|
||||||
|
int rec_offset; // the first few bytes of record ( at least one)
|
||||||
|
// are just markers for deleted and NULLs. We want to skip them since
|
||||||
|
// they will just bloat the tree without providing any valuable info
|
||||||
|
|
||||||
|
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Item_sum_count_distinct(List<Item> &list)
|
Item_sum_count_distinct(List<Item> &list)
|
||||||
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),
|
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),
|
||||||
tmp_table_param(0)
|
tmp_table_param(0),use_tree(0)
|
||||||
{ quick_group=0; }
|
{ quick_group=0; }
|
||||||
~Item_sum_count_distinct();
|
~Item_sum_count_distinct();
|
||||||
table_map used_tables() const { return used_table_cache; }
|
table_map used_tables() const { return used_table_cache; }
|
||||||
|
Reference in New Issue
Block a user