mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
use tree for count(distinct) when possible
This commit is contained in:
@ -57,7 +57,7 @@ typedef struct st_tree {
|
||||
void (*free)(void *);
|
||||
} TREE;
|
||||
|
||||
/* Functions on hole tree */
|
||||
/* Functions on whole tree */
|
||||
void init_tree(TREE *tree,uint default_alloc_size, int element_size,
|
||||
qsort_cmp2 compare, my_bool with_delete,
|
||||
void (*free_element)(void*));
|
||||
|
@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size,
|
||||
((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1))))
|
||||
{
|
||||
tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */
|
||||
/* Fix allocation size so that we don't loose any memory */
|
||||
/* Fix allocation size so that we don't lose any memory */
|
||||
default_alloc_size/=(sizeof(TREE_ELEMENT)+size);
|
||||
if (!default_alloc_size)
|
||||
default_alloc_size=1;
|
||||
|
103
sql/item_sum.cc
103
sql/item_sum.cc
@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str)
|
||||
|
||||
#include "sql_select.h"
|
||||
|
||||
static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
|
||||
{
|
||||
return memcmp(key1, key2, (int)arg);
|
||||
}
|
||||
|
||||
static int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
|
||||
{
|
||||
return my_sortcmp(key1, key2, (int)arg);
|
||||
}
|
||||
|
||||
// did not make this one static - at least gcc gets confused when
|
||||
// I try to declare a static function as a friend. If you can figure
|
||||
// out the syntax to make a static function a friend, make this one
|
||||
// static
|
||||
int composite_key_cmp(void* arg, byte* key1, byte* key2)
|
||||
{
|
||||
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
|
||||
Field** field = item->table->field, **field_end;
|
||||
field_end = field + item->table->fields;
|
||||
for(; field < field_end; ++field)
|
||||
{
|
||||
int res;
|
||||
int len = (*field)->field_length;
|
||||
switch((*field)->type())
|
||||
{
|
||||
case FIELD_TYPE_STRING:
|
||||
case FIELD_TYPE_VAR_STRING:
|
||||
res = my_sortcmp(key1, key2, len);
|
||||
break;
|
||||
default:
|
||||
res = memcmp(key1, key2, len);
|
||||
break;
|
||||
}
|
||||
if(res)
|
||||
return res;
|
||||
key1 += len;
|
||||
key2 += len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Item_sum_count_distinct::~Item_sum_count_distinct()
|
||||
{
|
||||
if (table)
|
||||
free_tmp_table(current_thd, table);
|
||||
delete tmp_table_param;
|
||||
if(use_tree)
|
||||
delete_tree(&tree);
|
||||
}
|
||||
|
||||
|
||||
@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd)
|
||||
0, 0, current_lex->options | thd->options)))
|
||||
return 1;
|
||||
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
|
||||
|
||||
if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be
|
||||
// MyISAM
|
||||
{
|
||||
qsort_cmp2 compare_key;
|
||||
void* cmp_arg;
|
||||
int key_len;
|
||||
|
||||
if(table->fields == 1) // if we have only one field, which is
|
||||
// the most common use of count(distinct), it is much faster
|
||||
// to use a simpler key compare method that can take advantage
|
||||
// of not having to worry about other fields
|
||||
{
|
||||
switch(table->field[0]->type())
|
||||
{
|
||||
// if we have a string, we must take care of charsets
|
||||
// and case sensitivity
|
||||
case FIELD_TYPE_STRING:
|
||||
case FIELD_TYPE_VAR_STRING:
|
||||
compare_key = (qsort_cmp2)simple_str_key_cmp;
|
||||
break;
|
||||
default: // since at this point we cannot have blobs
|
||||
// anything else can be compared with memcmp
|
||||
compare_key = (qsort_cmp2)simple_raw_key_cmp;
|
||||
break;
|
||||
}
|
||||
cmp_arg = (void*)(key_len = table->field[0]->field_length);
|
||||
rec_offset = 1;
|
||||
}
|
||||
else // too bad, cannot cheat - there is more than one field
|
||||
{
|
||||
cmp_arg = (void*)this;
|
||||
compare_key = (qsort_cmp2)composite_key_cmp;
|
||||
Field** field, **field_end;
|
||||
field_end = (field = table->field) + table->fields;
|
||||
for(key_len = 0; field < field_end; ++field)
|
||||
{
|
||||
key_len += (*field)->field_length;
|
||||
}
|
||||
rec_offset = table->reclength - key_len;
|
||||
}
|
||||
|
||||
init_tree(&tree, 0, key_len, compare_key, 0, 0);
|
||||
tree.cmp_arg = cmp_arg;
|
||||
use_tree = 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset()
|
||||
table->file->extra(HA_EXTRA_NO_CACHE);
|
||||
table->file->delete_all_rows();
|
||||
table->file->extra(HA_EXTRA_WRITE_CACHE);
|
||||
if(use_tree)
|
||||
delete_tree(&tree);
|
||||
(void) add();
|
||||
}
|
||||
|
||||
@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add()
|
||||
if ((*field)->is_real_null(0))
|
||||
return 0; // Don't count NULL
|
||||
|
||||
if ((error=table->file->write_row(table->record[0])))
|
||||
if(use_tree)
|
||||
{
|
||||
if(!tree_insert(&tree, table->record[0] + rec_offset, 0))
|
||||
return 1;
|
||||
}
|
||||
else if ((error=table->file->write_row(table->record[0])))
|
||||
{
|
||||
if (error != HA_ERR_FOUND_DUPP_KEY &&
|
||||
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
||||
@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int()
|
||||
{
|
||||
if (!table) // Empty query
|
||||
return LL(0);
|
||||
if(use_tree)
|
||||
return tree.elements_in_tree;
|
||||
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
|
||||
return table->file->records;
|
||||
}
|
||||
|
@ -21,6 +21,8 @@
|
||||
#pragma interface /* gcc class implementation */
|
||||
#endif
|
||||
|
||||
#include <my_tree.h>
|
||||
|
||||
class Item_sum :public Item_result_field
|
||||
{
|
||||
public:
|
||||
@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int
|
||||
table_map used_table_cache;
|
||||
bool fix_fields(THD *thd,TABLE_LIST *tables);
|
||||
TMP_TABLE_PARAM *tmp_table_param;
|
||||
TREE tree;
|
||||
bool use_tree; // If there are no blobs, we can use a tree, which
|
||||
// is faster than heap table. In that case, we still use the table
|
||||
// to help get things set up, but we insert nothing in it
|
||||
int rec_offset; // the first few bytes of record ( at least one)
|
||||
// are just markers for deleted and NULLs. We want to skip them since
|
||||
// they will just bloat the tree without providing any valuable info
|
||||
|
||||
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
|
||||
|
||||
public:
|
||||
Item_sum_count_distinct(List<Item> &list)
|
||||
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),
|
||||
tmp_table_param(0)
|
||||
tmp_table_param(0),use_tree(0)
|
||||
{ quick_group=0; }
|
||||
~Item_sum_count_distinct();
|
||||
table_map used_tables() const { return used_table_cache; }
|
||||
|
Reference in New Issue
Block a user