mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Move JSON histograms code into its own files
This commit is contained in:
@@ -151,6 +151,7 @@ SET (SQL_SOURCE
|
|||||||
sql_analyze_stmt.cc
|
sql_analyze_stmt.cc
|
||||||
sql_join_cache.cc
|
sql_join_cache.cc
|
||||||
create_options.cc multi_range_read.cc
|
create_options.cc multi_range_read.cc
|
||||||
|
opt_histogram_json.cc
|
||||||
opt_index_cond_pushdown.cc opt_subselect.cc
|
opt_index_cond_pushdown.cc opt_subselect.cc
|
||||||
opt_table_elimination.cc sql_expression_cache.cc
|
opt_table_elimination.cc sql_expression_cache.cc
|
||||||
gcalc_slicescan.cc gcalc_tools.cc
|
gcalc_slicescan.cc gcalc_tools.cc
|
||||||
|
391
sql/opt_histogram_json.cc
Normal file
391
sql/opt_histogram_json.cc
Normal file
@@ -0,0 +1,391 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2021, MariaDB Corporation.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; version 2 of the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
||||||
|
|
||||||
|
#include "mariadb.h"
|
||||||
|
#include "sql_base.h"
|
||||||
|
#include "my_json_writer.h"
|
||||||
|
#include "sql_statistics.h"
|
||||||
|
#include "opt_histogram_json.h"
|
||||||
|
|
||||||
|
class Histogram_json_builder : public Histogram_builder
|
||||||
|
{
|
||||||
|
Histogram_json_hb *histogram;
|
||||||
|
uint hist_width; /* the number of points in the histogram */
|
||||||
|
double bucket_capacity; /* number of rows in a bucket of the histogram */
|
||||||
|
uint curr_bucket; /* number of the current bucket to be built */
|
||||||
|
|
||||||
|
std::vector<std::string> bucket_bounds;
|
||||||
|
bool first_value= true;
|
||||||
|
public:
|
||||||
|
|
||||||
|
Histogram_json_builder(Histogram_json_hb *hist, Field *col, uint col_len,
|
||||||
|
ha_rows rows)
|
||||||
|
: Histogram_builder(col, col_len, rows), histogram(hist)
|
||||||
|
{
|
||||||
|
bucket_capacity= (double)records / histogram->get_width();
|
||||||
|
hist_width= histogram->get_width();
|
||||||
|
curr_bucket= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
~Histogram_json_builder() override = default;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Add data to the histogram. This call adds elem_cnt rows, each
|
||||||
|
of which has value of *elem.
|
||||||
|
|
||||||
|
@detail
|
||||||
|
Subsequent next() calls will add values that are greater than *elem.
|
||||||
|
*/
|
||||||
|
int next(void *elem, element_count elem_cnt) override
|
||||||
|
{
|
||||||
|
counters.next(elem, elem_cnt);
|
||||||
|
ulonglong count= counters.get_count();
|
||||||
|
|
||||||
|
if (curr_bucket == hist_width)
|
||||||
|
return 0;
|
||||||
|
if (first_value)
|
||||||
|
{
|
||||||
|
first_value= false;
|
||||||
|
column->store_field_value((uchar*) elem, col_length);
|
||||||
|
StringBuffer<MAX_FIELD_WIDTH> val;
|
||||||
|
column->val_str(&val);
|
||||||
|
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > bucket_capacity * (curr_bucket + 1))
|
||||||
|
{
|
||||||
|
column->store_field_value((uchar*) elem, col_length);
|
||||||
|
StringBuffer<MAX_FIELD_WIDTH> val;
|
||||||
|
column->val_str(&val);
|
||||||
|
bucket_bounds.emplace_back(val.ptr(), val.length());
|
||||||
|
|
||||||
|
curr_bucket++;
|
||||||
|
while (curr_bucket != hist_width &&
|
||||||
|
count > bucket_capacity * (curr_bucket + 1))
|
||||||
|
{
|
||||||
|
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
||||||
|
curr_bucket++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (records == count && bucket_bounds.size() == hist_width)
|
||||||
|
{
|
||||||
|
column->store_field_value((uchar*) elem, col_length);
|
||||||
|
StringBuffer<MAX_FIELD_WIDTH> val;
|
||||||
|
column->val_str(&val);
|
||||||
|
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Finalize the creation of histogram
|
||||||
|
*/
|
||||||
|
void finalize() override
|
||||||
|
{
|
||||||
|
Json_writer writer;
|
||||||
|
writer.start_object();
|
||||||
|
writer.add_member(Histogram_json_hb::JSON_NAME).start_array();
|
||||||
|
|
||||||
|
for(auto& value: bucket_bounds) {
|
||||||
|
writer.add_str(value.c_str());
|
||||||
|
}
|
||||||
|
writer.end_array();
|
||||||
|
writer.end_object();
|
||||||
|
Binary_string *json_string= (Binary_string *) writer.output.get_string();
|
||||||
|
histogram->set_json_text(bucket_bounds.size()-1,
|
||||||
|
(uchar *) json_string->c_ptr());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Histogram_builder *Histogram_json_hb::create_builder(Field *col, uint col_len,
|
||||||
|
ha_rows rows)
|
||||||
|
{
|
||||||
|
return new Histogram_json_builder(this, col, col_len, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Histogram_json_hb::init_for_collection(MEM_ROOT *mem_root,
|
||||||
|
Histogram_type htype_arg,
|
||||||
|
ulonglong size_arg)
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(htype_arg == JSON_HB);
|
||||||
|
size= (uint8) size_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Parse the histogram from its on-disk representation
|
||||||
|
|
||||||
|
@return
|
||||||
|
false OK
|
||||||
|
True Error
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
|
||||||
|
Histogram_type type_arg, const char *hist_data,
|
||||||
|
size_t hist_data_len)
|
||||||
|
{
|
||||||
|
DBUG_ENTER("Histogram_json_hb::parse");
|
||||||
|
DBUG_ASSERT(type_arg == JSON_HB);
|
||||||
|
const char *err;
|
||||||
|
json_engine_t je;
|
||||||
|
json_string_t key_name;
|
||||||
|
|
||||||
|
json_scan_start(&je, &my_charset_utf8mb4_bin,
|
||||||
|
(const uchar*)hist_data,
|
||||||
|
(const uchar*)hist_data+hist_data_len);
|
||||||
|
|
||||||
|
if (json_read_value(&je) || je.value_type != JSON_VALUE_OBJECT)
|
||||||
|
{
|
||||||
|
err= "Root JSON element must be a JSON object";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
json_string_set_str(&key_name, (const uchar*)JSON_NAME,
|
||||||
|
(const uchar*)JSON_NAME + strlen(JSON_NAME));
|
||||||
|
json_string_set_cs(&key_name, system_charset_info);
|
||||||
|
|
||||||
|
if (json_scan_next(&je) || je.state != JST_KEY ||
|
||||||
|
!json_key_matches(&je, &key_name))
|
||||||
|
{
|
||||||
|
err= "The first key in the object must be histogram_hb_v1";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The value must be a JSON array
|
||||||
|
if (json_read_value(&je) || (je.value_type != JSON_VALUE_ARRAY))
|
||||||
|
{
|
||||||
|
err= "A JSON array expected";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the array
|
||||||
|
while (!json_scan_next(&je))
|
||||||
|
{
|
||||||
|
switch(je.state)
|
||||||
|
{
|
||||||
|
case JST_VALUE:
|
||||||
|
{
|
||||||
|
const char *val;
|
||||||
|
int val_len;
|
||||||
|
json_smart_read_value(&je, &val, &val_len);
|
||||||
|
if (je.value_type != JSON_VALUE_STRING &&
|
||||||
|
je.value_type != JSON_VALUE_NUMBER &&
|
||||||
|
je.value_type != JSON_VALUE_TRUE &&
|
||||||
|
je.value_type != JSON_VALUE_FALSE)
|
||||||
|
{
|
||||||
|
err= "Scalar value expected";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
uchar buf[MAX_KEY_LENGTH];
|
||||||
|
uint len_to_copy= field->key_length();
|
||||||
|
field->store_text(val, val_len, &my_charset_bin);
|
||||||
|
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
|
||||||
|
histogram_bounds.push_back(std::string((char*)buf, bytes));
|
||||||
|
// TODO: Should we also compare this endpoint with the previous
|
||||||
|
// to verify that the ordering is right?
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case JST_ARRAY_END:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// n_buckets = n_bounds - 1 :
|
||||||
|
size= histogram_bounds.size()-1;
|
||||||
|
DBUG_RETURN(false);
|
||||||
|
|
||||||
|
error:
|
||||||
|
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err,
|
||||||
|
je.s.c_str - (const uchar*)hist_data);
|
||||||
|
DBUG_RETURN(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
void store_key_image_to_rec_no_null(Field *field, const uchar *ptr)
|
||||||
|
{
|
||||||
|
MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
|
||||||
|
&field->table->write_set);
|
||||||
|
field->set_key_image(ptr, field->key_length());
|
||||||
|
dbug_tmp_restore_column_map(&field->table->write_set, old_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
double position_in_interval(Field *field, const uchar *key,
|
||||||
|
const std::string& left, const std::string& right)
|
||||||
|
{
|
||||||
|
double res;
|
||||||
|
if (field->pos_through_val_str())
|
||||||
|
{
|
||||||
|
uint32 min_len= uint2korr(left.data());
|
||||||
|
uint32 max_len= uint2korr(right.data());
|
||||||
|
uint32 midp_len= uint2korr(key);
|
||||||
|
|
||||||
|
res= pos_in_interval_for_string(field->charset(),
|
||||||
|
key + HA_KEY_BLOB_LENGTH,
|
||||||
|
midp_len,
|
||||||
|
(const uchar*)left.data() + HA_KEY_BLOB_LENGTH,
|
||||||
|
min_len,
|
||||||
|
(const uchar*)right.data() + HA_KEY_BLOB_LENGTH,
|
||||||
|
max_len);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
store_key_image_to_rec_no_null(field, (const uchar*)left.data());
|
||||||
|
double min_val_real= field->val_real();
|
||||||
|
|
||||||
|
store_key_image_to_rec_no_null(field, (const uchar*)right.data());
|
||||||
|
double max_val_real= field->val_real();
|
||||||
|
|
||||||
|
store_key_image_to_rec_no_null(field, key);
|
||||||
|
double midp_val_real= field->val_real();
|
||||||
|
|
||||||
|
res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
||||||
|
double avg_sel)
|
||||||
|
{
|
||||||
|
double sel;
|
||||||
|
store_key_image_to_rec(field, (uchar *) endpoint->key,
|
||||||
|
field->key_length());
|
||||||
|
const uchar *min_key = endpoint->key;
|
||||||
|
if (field->real_maybe_null())
|
||||||
|
min_key++;
|
||||||
|
uint min_idx= find_bucket(field, min_key, false);
|
||||||
|
|
||||||
|
uint max_idx= find_bucket(field, min_key, true);
|
||||||
|
#if 0
|
||||||
|
// find how many buckets this value occupies
|
||||||
|
while ((max_idx + 1 < get_width() ) &&
|
||||||
|
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
|
||||||
|
max_idx++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (max_idx > min_idx)
|
||||||
|
{
|
||||||
|
// value spans multiple buckets
|
||||||
|
double bucket_sel= 1.0/(get_width() + 1);
|
||||||
|
sel= bucket_sel * (max_idx - min_idx + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// the value fits within a single bucket
|
||||||
|
sel = MY_MIN(avg_sel, 1.0/get_width());
|
||||||
|
}
|
||||||
|
return sel;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
@param field The table field histogram is for. We don't care about the
|
||||||
|
field's current value, we only need its virtual functions to
|
||||||
|
perform various operations
|
||||||
|
|
||||||
|
@param min_endp Left endpoint, or NULL if there is none
|
||||||
|
@param max_endp Right endpoint, or NULL if there is none
|
||||||
|
*/
|
||||||
|
|
||||||
|
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
||||||
|
key_range *max_endp)
|
||||||
|
{
|
||||||
|
double min, max;
|
||||||
|
double width= 1.0 / histogram_bounds.size();
|
||||||
|
|
||||||
|
if (min_endp && !(field->null_ptr && min_endp->key[0]))
|
||||||
|
{
|
||||||
|
bool exclusive_endp= (min_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
||||||
|
const uchar *min_key= min_endp->key;
|
||||||
|
if (field->real_maybe_null())
|
||||||
|
min_key++;
|
||||||
|
|
||||||
|
// Find the leftmost bucket that contains the lookup value.
|
||||||
|
// (If the lookup value is to the left of all buckets, find bucket #0)
|
||||||
|
int idx= find_bucket(field, min_key, exclusive_endp);
|
||||||
|
double min_sel= position_in_interval(field, (const uchar*)min_key,
|
||||||
|
histogram_bounds[idx],
|
||||||
|
histogram_bounds[idx+1]);
|
||||||
|
min= idx*width + min_sel*width;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
min= 0.0;
|
||||||
|
|
||||||
|
if (max_endp)
|
||||||
|
{
|
||||||
|
// The right endpoint cannot be NULL
|
||||||
|
DBUG_ASSERT(!(field->null_ptr && max_endp->key[0]));
|
||||||
|
bool inclusive_endp= (max_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
||||||
|
const uchar *max_key= max_endp->key;
|
||||||
|
if (field->real_maybe_null())
|
||||||
|
max_key++;
|
||||||
|
|
||||||
|
int idx= find_bucket(field, max_key, inclusive_endp);
|
||||||
|
double max_sel= position_in_interval(field, (const uchar*)max_key,
|
||||||
|
histogram_bounds[idx],
|
||||||
|
histogram_bounds[idx+1]);
|
||||||
|
max= idx*width + max_sel*width;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
max= 1.0;
|
||||||
|
|
||||||
|
double sel = max - min;
|
||||||
|
return sel;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Histogram_json_hb::serialize(Field *field)
|
||||||
|
{
|
||||||
|
field->store(json_text.data(), json_text.size(), &my_charset_bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Find the histogram bucket that contains the value.
|
||||||
|
|
||||||
|
@param equal_is_less Controls what to do if a histogram bound is equal to the
|
||||||
|
lookup_val.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
|
||||||
|
bool equal_is_less)
|
||||||
|
{
|
||||||
|
int low= 0;
|
||||||
|
int high= histogram_bounds.size() - 1;
|
||||||
|
int middle;
|
||||||
|
|
||||||
|
while (low + 1 < high)
|
||||||
|
{
|
||||||
|
middle= (low + high) / 2;
|
||||||
|
int res= field->key_cmp((uchar*)histogram_bounds[middle].data(), lookup_val);
|
||||||
|
if (!res)
|
||||||
|
res= equal_is_less? -1: 1;
|
||||||
|
if (res < 0)
|
||||||
|
low= middle;
|
||||||
|
else //res > 0
|
||||||
|
high= middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
return low;
|
||||||
|
}
|
95
sql/opt_histogram_json.h
Normal file
95
sql/opt_histogram_json.h
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2021, MariaDB Corporation.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; version 2 of the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
||||||
|
|
||||||
|
#include "sql_statistics.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
An equi-height histogram which stores real values for bucket bounds.
|
||||||
|
|
||||||
|
Handles @@histogram_type=JSON_HB
|
||||||
|
*/
|
||||||
|
|
||||||
|
class Histogram_json_hb : public Histogram_base
|
||||||
|
{
|
||||||
|
size_t size; /* Number of elements in the histogram */
|
||||||
|
|
||||||
|
/* Collection-time only: collected histogram in the JSON form. */
|
||||||
|
std::string json_text;
|
||||||
|
|
||||||
|
// Array of histogram bucket endpoints in KeyTupleFormat.
|
||||||
|
std::vector<std::string> histogram_bounds;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr const char* JSON_NAME="histogram_hb_v1";
|
||||||
|
|
||||||
|
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
||||||
|
const char *hist_data, size_t hist_data_len) override;
|
||||||
|
|
||||||
|
void serialize(Field *field) override;
|
||||||
|
|
||||||
|
Histogram_builder *create_builder(Field *col, uint col_len,
|
||||||
|
ha_rows rows) override;
|
||||||
|
|
||||||
|
// returns number of buckets in the histogram
|
||||||
|
uint get_width() override
|
||||||
|
{
|
||||||
|
return (uint)size;
|
||||||
|
}
|
||||||
|
|
||||||
|
Histogram_type get_type() override
|
||||||
|
{
|
||||||
|
return JSON_HB;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Legacy: this returns the size of the histogram on disk.
|
||||||
|
|
||||||
|
@detail
|
||||||
|
This is only called at collection time when json_text is non-empty.
|
||||||
|
*/
|
||||||
|
uint get_size() override
|
||||||
|
{
|
||||||
|
return json_text.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
|
||||||
|
ulonglong size) override;
|
||||||
|
|
||||||
|
bool is_available() override {return true; }
|
||||||
|
|
||||||
|
bool is_usable(THD *thd) override
|
||||||
|
{
|
||||||
|
return thd->variables.optimizer_use_condition_selectivity > 3 &&
|
||||||
|
is_available();
|
||||||
|
}
|
||||||
|
|
||||||
|
double point_selectivity(Field *field, key_range *endpoint,
|
||||||
|
double avg_selection) override;
|
||||||
|
double range_selectivity(Field *field, key_range *min_endp,
|
||||||
|
key_range *max_endp) override;
|
||||||
|
|
||||||
|
void set_json_text(ulonglong sz, uchar *json_text_arg)
|
||||||
|
{
|
||||||
|
size = (uint8) sz;
|
||||||
|
json_text.assign((const char*)json_text_arg,
|
||||||
|
strlen((const char*)json_text_arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
|
||||||
|
};
|
||||||
|
|
@@ -28,11 +28,11 @@
|
|||||||
#include "sql_base.h"
|
#include "sql_base.h"
|
||||||
#include "key.h"
|
#include "key.h"
|
||||||
#include "sql_statistics.h"
|
#include "sql_statistics.h"
|
||||||
|
#include "opt_histogram_json.h"
|
||||||
#include "opt_range.h"
|
#include "opt_range.h"
|
||||||
#include "uniques.h"
|
#include "uniques.h"
|
||||||
#include "sql_show.h"
|
#include "sql_show.h"
|
||||||
#include "sql_partition.h"
|
#include "sql_partition.h"
|
||||||
#include "my_json_writer.h"
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -1276,273 +1276,6 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Histogram_json_hb::init_for_collection(MEM_ROOT *mem_root,
|
|
||||||
Histogram_type htype_arg,
|
|
||||||
ulonglong size_arg)
|
|
||||||
{
|
|
||||||
DBUG_ASSERT(htype_arg == JSON_HB);
|
|
||||||
size= (uint8) size_arg;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
@brief
|
|
||||||
Parse the histogram from its on-disk representation
|
|
||||||
|
|
||||||
@return
|
|
||||||
false OK
|
|
||||||
True Error
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
|
|
||||||
Histogram_type type_arg, const char *hist_data,
|
|
||||||
size_t hist_data_len)
|
|
||||||
{
|
|
||||||
DBUG_ENTER("Histogram_json_hb::parse");
|
|
||||||
DBUG_ASSERT(type_arg == JSON_HB);
|
|
||||||
const char *err;
|
|
||||||
json_engine_t je;
|
|
||||||
json_string_t key_name;
|
|
||||||
|
|
||||||
json_scan_start(&je, &my_charset_utf8mb4_bin,
|
|
||||||
(const uchar*)hist_data,
|
|
||||||
(const uchar*)hist_data+hist_data_len);
|
|
||||||
|
|
||||||
if (json_read_value(&je) || je.value_type != JSON_VALUE_OBJECT)
|
|
||||||
{
|
|
||||||
err= "Root JSON element must be a JSON object";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
json_string_set_str(&key_name, (const uchar*)JSON_NAME,
|
|
||||||
(const uchar*)JSON_NAME + strlen(JSON_NAME));
|
|
||||||
json_string_set_cs(&key_name, system_charset_info);
|
|
||||||
|
|
||||||
if (json_scan_next(&je) || je.state != JST_KEY ||
|
|
||||||
!json_key_matches(&je, &key_name))
|
|
||||||
{
|
|
||||||
err= "The first key in the object must be histogram_hb_v1";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The value must be a JSON array
|
|
||||||
if (json_read_value(&je) || (je.value_type != JSON_VALUE_ARRAY))
|
|
||||||
{
|
|
||||||
err= "A JSON array expected";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the array
|
|
||||||
while (!json_scan_next(&je))
|
|
||||||
{
|
|
||||||
switch(je.state)
|
|
||||||
{
|
|
||||||
case JST_VALUE:
|
|
||||||
{
|
|
||||||
const char *val;
|
|
||||||
int val_len;
|
|
||||||
json_smart_read_value(&je, &val, &val_len);
|
|
||||||
if (je.value_type != JSON_VALUE_STRING &&
|
|
||||||
je.value_type != JSON_VALUE_NUMBER &&
|
|
||||||
je.value_type != JSON_VALUE_TRUE &&
|
|
||||||
je.value_type != JSON_VALUE_FALSE)
|
|
||||||
{
|
|
||||||
err= "Scalar value expected";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
uchar buf[MAX_KEY_LENGTH];
|
|
||||||
uint len_to_copy= field->key_length();
|
|
||||||
field->store_text(val, val_len, &my_charset_bin);
|
|
||||||
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
|
|
||||||
histogram_bounds.push_back(std::string((char*)buf, bytes));
|
|
||||||
// TODO: Should we also compare this endpoint with the previous
|
|
||||||
// to verify that the ordering is right?
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case JST_ARRAY_END:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// n_buckets = n_bounds - 1 :
|
|
||||||
size= histogram_bounds.size()-1;
|
|
||||||
DBUG_RETURN(false);
|
|
||||||
|
|
||||||
error:
|
|
||||||
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err,
|
|
||||||
je.s.c_str - (const uchar*)hist_data);
|
|
||||||
DBUG_RETURN(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
|
|
||||||
double avg_sel)
|
|
||||||
{
|
|
||||||
double sel;
|
|
||||||
store_key_image_to_rec(field, (uchar *) endpoint->key,
|
|
||||||
field->key_length());
|
|
||||||
const uchar *min_key = endpoint->key;
|
|
||||||
if (field->real_maybe_null())
|
|
||||||
min_key++;
|
|
||||||
uint min_idx= find_bucket(field, min_key, false);
|
|
||||||
|
|
||||||
uint max_idx= find_bucket(field, min_key, true);
|
|
||||||
#if 0
|
|
||||||
// find how many buckets this value occupies
|
|
||||||
while ((max_idx + 1 < get_width() ) &&
|
|
||||||
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
|
|
||||||
max_idx++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (max_idx > min_idx)
|
|
||||||
{
|
|
||||||
// value spans multiple buckets
|
|
||||||
double bucket_sel= 1.0/(get_width() + 1);
|
|
||||||
sel= bucket_sel * (max_idx - min_idx + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// the value fits within a single bucket
|
|
||||||
sel = MY_MIN(avg_sel, 1.0/get_width());
|
|
||||||
}
|
|
||||||
return sel;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static
|
|
||||||
void store_key_image_to_rec_no_null(Field *field, const uchar *ptr)
|
|
||||||
{
|
|
||||||
MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
|
|
||||||
&field->table->write_set);
|
|
||||||
field->set_key_image(ptr, field->key_length());
|
|
||||||
dbug_tmp_restore_column_map(&field->table->write_set, old_map);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static
|
|
||||||
double position_in_interval(Field *field, const uchar *key,
|
|
||||||
const std::string& left, const std::string& right)
|
|
||||||
{
|
|
||||||
double res;
|
|
||||||
if (field->pos_through_val_str())
|
|
||||||
{
|
|
||||||
uint32 min_len= uint2korr(left.data());
|
|
||||||
uint32 max_len= uint2korr(right.data());
|
|
||||||
uint32 midp_len= uint2korr(key);
|
|
||||||
|
|
||||||
res= pos_in_interval_for_string(field->charset(),
|
|
||||||
key + HA_KEY_BLOB_LENGTH,
|
|
||||||
midp_len,
|
|
||||||
(const uchar*)left.data() + HA_KEY_BLOB_LENGTH,
|
|
||||||
min_len,
|
|
||||||
(const uchar*)right.data() + HA_KEY_BLOB_LENGTH,
|
|
||||||
max_len);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
store_key_image_to_rec_no_null(field, (const uchar*)left.data());
|
|
||||||
double min_val_real= field->val_real();
|
|
||||||
|
|
||||||
store_key_image_to_rec_no_null(field, (const uchar*)right.data());
|
|
||||||
double max_val_real= field->val_real();
|
|
||||||
|
|
||||||
store_key_image_to_rec_no_null(field, key);
|
|
||||||
double midp_val_real= field->val_real();
|
|
||||||
|
|
||||||
res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
@param field The table field histogram is for. We don't care about the
|
|
||||||
field's current value, we only need its virtual functions to
|
|
||||||
perform various operations
|
|
||||||
|
|
||||||
@param min_endp Left endpoint, or NULL if there is none
|
|
||||||
@param max_endp Right endpoint, or NULL if there is none
|
|
||||||
*/
|
|
||||||
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
|
|
||||||
key_range *max_endp)
|
|
||||||
{
|
|
||||||
double min, max;
|
|
||||||
double width= 1.0 / histogram_bounds.size();
|
|
||||||
|
|
||||||
if (min_endp && !(field->null_ptr && min_endp->key[0]))
|
|
||||||
{
|
|
||||||
bool exclusive_endp= (min_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
|
||||||
const uchar *min_key= min_endp->key;
|
|
||||||
if (field->real_maybe_null())
|
|
||||||
min_key++;
|
|
||||||
|
|
||||||
// Find the leftmost bucket that contains the lookup value.
|
|
||||||
// (If the lookup value is to the left of all buckets, find bucket #0)
|
|
||||||
int idx= find_bucket(field, min_key, exclusive_endp);
|
|
||||||
double min_sel= position_in_interval(field, (const uchar*)min_key,
|
|
||||||
histogram_bounds[idx],
|
|
||||||
histogram_bounds[idx+1]);
|
|
||||||
min= idx*width + min_sel*width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
min= 0.0;
|
|
||||||
|
|
||||||
if (max_endp)
|
|
||||||
{
|
|
||||||
// The right endpoint cannot be NULL
|
|
||||||
DBUG_ASSERT(!(field->null_ptr && max_endp->key[0]));
|
|
||||||
bool inclusive_endp= (max_endp->flag == HA_READ_AFTER_KEY)? true: false;
|
|
||||||
const uchar *max_key= max_endp->key;
|
|
||||||
if (field->real_maybe_null())
|
|
||||||
max_key++;
|
|
||||||
|
|
||||||
int idx= find_bucket(field, max_key, inclusive_endp);
|
|
||||||
double max_sel= position_in_interval(field, (const uchar*)max_key,
|
|
||||||
histogram_bounds[idx],
|
|
||||||
histogram_bounds[idx+1]);
|
|
||||||
max= idx*width + max_sel*width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
max= 1.0;
|
|
||||||
|
|
||||||
double sel = max - min;
|
|
||||||
return sel;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Histogram_json_hb::serialize(Field *field)
|
|
||||||
{
|
|
||||||
field->store(json_text.data(), json_text.size(), &my_charset_bin);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Find the histogram bucket that contains the value.
|
|
||||||
|
|
||||||
@param equal_is_less Controls what to do if a histogram bound is equal to the
|
|
||||||
lookup_val.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
|
|
||||||
bool equal_is_less)
|
|
||||||
{
|
|
||||||
int low= 0;
|
|
||||||
int high= histogram_bounds.size() - 1;
|
|
||||||
int middle;
|
|
||||||
|
|
||||||
while (low + 1 < high)
|
|
||||||
{
|
|
||||||
middle= (low + high) / 2;
|
|
||||||
int res= field->key_cmp((uchar*)histogram_bounds[middle].data(), lookup_val);
|
|
||||||
if (!res)
|
|
||||||
res= equal_is_less? -1: 1;
|
|
||||||
if (res < 0)
|
|
||||||
low= middle;
|
|
||||||
else //res > 0
|
|
||||||
high= middle;
|
|
||||||
}
|
|
||||||
|
|
||||||
return low;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
An object of the class Index_stat is created to read statistical
|
An object of the class Index_stat is created to read statistical
|
||||||
data on tables from the statistical table table_stat, to update
|
data on tables from the statistical table table_stat, to update
|
||||||
@@ -1853,73 +1586,6 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
This is used to collect the the basic statistics from a Unique object:
|
|
||||||
- count of values
|
|
||||||
- count of distinct values
|
|
||||||
- count of distinct values that have occurred only once
|
|
||||||
*/
|
|
||||||
|
|
||||||
class Basic_stats_collector
|
|
||||||
{
|
|
||||||
ulonglong count; /* number of values retrieved */
|
|
||||||
ulonglong count_distinct; /* number of distinct values retrieved */
|
|
||||||
/* number of distinct values that occured only once */
|
|
||||||
ulonglong count_distinct_single_occurence;
|
|
||||||
|
|
||||||
public:
|
|
||||||
Basic_stats_collector()
|
|
||||||
{
|
|
||||||
count= 0;
|
|
||||||
count_distinct= 0;
|
|
||||||
count_distinct_single_occurence= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ulonglong get_count_distinct() const { return count_distinct; }
|
|
||||||
ulonglong get_count_single_occurence() const
|
|
||||||
{
|
|
||||||
return count_distinct_single_occurence;
|
|
||||||
}
|
|
||||||
ulonglong get_count() const { return count; }
|
|
||||||
|
|
||||||
void next(void *elem, element_count elem_cnt)
|
|
||||||
{
|
|
||||||
count_distinct++;
|
|
||||||
if (elem_cnt == 1)
|
|
||||||
count_distinct_single_occurence++;
|
|
||||||
count+= elem_cnt;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
Histogram_builder is a helper class that is used to build histograms
|
|
||||||
for columns.
|
|
||||||
|
|
||||||
Do not create directly, call Histogram->get_builder(...);
|
|
||||||
*/
|
|
||||||
|
|
||||||
class Histogram_builder
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
Field *column; /* table field for which the histogram is built */
|
|
||||||
uint col_length; /* size of this field */
|
|
||||||
ha_rows records; /* number of records the histogram is built for */
|
|
||||||
|
|
||||||
Histogram_builder(Field *col, uint col_len, ha_rows rows) :
|
|
||||||
column(col), col_length(col_len), records(rows)
|
|
||||||
{}
|
|
||||||
|
|
||||||
public:
|
|
||||||
// A histogram builder will also collect the counters
|
|
||||||
Basic_stats_collector counters;
|
|
||||||
|
|
||||||
virtual int next(void *elem, element_count elem_cnt)=0;
|
|
||||||
virtual void finalize()=0;
|
|
||||||
virtual ~Histogram_builder(){}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class Histogram_binary_builder : public Histogram_builder
|
class Histogram_binary_builder : public Histogram_builder
|
||||||
{
|
{
|
||||||
Field *min_value; /* pointer to the minimal value for the field */
|
Field *min_value; /* pointer to the minimal value for the field */
|
||||||
@@ -1974,101 +1640,6 @@ Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Histogram_json_builder : public Histogram_builder
|
|
||||||
{
|
|
||||||
Histogram_json_hb *histogram;
|
|
||||||
uint hist_width; /* the number of points in the histogram */
|
|
||||||
double bucket_capacity; /* number of rows in a bucket of the histogram */
|
|
||||||
uint curr_bucket; /* number of the current bucket to be built */
|
|
||||||
|
|
||||||
std::vector<std::string> bucket_bounds;
|
|
||||||
bool first_value= true;
|
|
||||||
public:
|
|
||||||
Histogram_json_builder(Field *col, uint col_len, ha_rows rows)
|
|
||||||
: Histogram_builder(col, col_len, rows)
|
|
||||||
{
|
|
||||||
histogram= (Histogram_json_hb*)col->collected_stats->histogram;
|
|
||||||
bucket_capacity= (double)records / histogram->get_width();
|
|
||||||
hist_width= histogram->get_width();
|
|
||||||
curr_bucket= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
~Histogram_json_builder() override = default;
|
|
||||||
|
|
||||||
/*
|
|
||||||
Add data to the histogram. Adding Element elem which encountered elem_cnt
|
|
||||||
times.
|
|
||||||
*/
|
|
||||||
int next(void *elem, element_count elem_cnt) override
|
|
||||||
{
|
|
||||||
counters.next(elem, elem_cnt);
|
|
||||||
ulonglong count= counters.get_count();
|
|
||||||
|
|
||||||
if (curr_bucket == hist_width)
|
|
||||||
return 0;
|
|
||||||
if (first_value)
|
|
||||||
{
|
|
||||||
first_value= false;
|
|
||||||
column->store_field_value((uchar*) elem, col_length);
|
|
||||||
StringBuffer<MAX_FIELD_WIDTH> val;
|
|
||||||
column->val_str(&val);
|
|
||||||
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (count > bucket_capacity * (curr_bucket + 1))
|
|
||||||
{
|
|
||||||
column->store_field_value((uchar*) elem, col_length);
|
|
||||||
StringBuffer<MAX_FIELD_WIDTH> val;
|
|
||||||
column->val_str(&val);
|
|
||||||
bucket_bounds.emplace_back(val.ptr(), val.length());
|
|
||||||
|
|
||||||
curr_bucket++;
|
|
||||||
while (curr_bucket != hist_width &&
|
|
||||||
count > bucket_capacity * (curr_bucket + 1))
|
|
||||||
{
|
|
||||||
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
|
||||||
curr_bucket++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (records == count && bucket_bounds.size() == hist_width)
|
|
||||||
{
|
|
||||||
column->store_field_value((uchar*) elem, col_length);
|
|
||||||
StringBuffer<MAX_FIELD_WIDTH> val;
|
|
||||||
column->val_str(&val);
|
|
||||||
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Finalize the creation of histogram
|
|
||||||
*/
|
|
||||||
void finalize() override
|
|
||||||
{
|
|
||||||
Json_writer writer;
|
|
||||||
writer.start_object();
|
|
||||||
writer.add_member(Histogram_json_hb::JSON_NAME).start_array();
|
|
||||||
|
|
||||||
for(auto& value: bucket_bounds) {
|
|
||||||
writer.add_str(value.c_str());
|
|
||||||
}
|
|
||||||
writer.end_array();
|
|
||||||
writer.end_object();
|
|
||||||
Binary_string *json_string = (Binary_string *) writer.output.get_string();
|
|
||||||
histogram->set_json_text(bucket_bounds.size()-1,
|
|
||||||
(uchar *) json_string->c_ptr());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
Histogram_builder *Histogram_json_hb::create_builder(Field *col, uint col_len,
|
|
||||||
ha_rows rows)
|
|
||||||
{
|
|
||||||
return new Histogram_json_builder(col, col_len, rows);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
|
Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
|
||||||
THD *owner)
|
THD *owner)
|
||||||
{
|
{
|
||||||
|
@@ -162,11 +162,18 @@ public:
|
|||||||
|
|
||||||
virtual uint get_width()=0;
|
virtual uint get_width()=0;
|
||||||
|
|
||||||
virtual Histogram_builder *create_builder(Field *col, uint col_len,
|
/*
|
||||||
ha_rows rows)=0;
|
The creation-time workflow is:
|
||||||
|
* create a histogram
|
||||||
|
* init_for_collection()
|
||||||
|
* create_builder()
|
||||||
|
* feed the data to the builder
|
||||||
|
* serialize();
|
||||||
|
*/
|
||||||
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
|
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
|
||||||
ulonglong size)=0;
|
ulonglong size)=0;
|
||||||
|
virtual Histogram_builder *create_builder(Field *col, uint col_len,
|
||||||
|
ha_rows rows)=0;
|
||||||
|
|
||||||
virtual bool is_available()=0;
|
virtual bool is_available()=0;
|
||||||
|
|
||||||
@@ -177,19 +184,26 @@ public:
|
|||||||
virtual double range_selectivity(Field *field, key_range *min_endp,
|
virtual double range_selectivity(Field *field, key_range *min_endp,
|
||||||
key_range *max_endp)=0;
|
key_range *max_endp)=0;
|
||||||
|
|
||||||
// Legacy: return the size of the histogram on disk.
|
/*
|
||||||
// This will be stored in mysql.column_stats.hist_size column.
|
Legacy: return the size of the histogram on disk.
|
||||||
// Newer, JSON-based histograms may return 0.
|
|
||||||
|
This will be stored in mysql.column_stats.hist_size column.
|
||||||
|
The value is not really needed as one can look at
|
||||||
|
LENGTH(mysql.column_stats.histogram) directly.
|
||||||
|
*/
|
||||||
virtual uint get_size()=0;
|
virtual uint get_size()=0;
|
||||||
virtual ~Histogram_base()= default;
|
virtual ~Histogram_base()= default;
|
||||||
|
|
||||||
|
|
||||||
Histogram_base() : owner(NULL) {}
|
Histogram_base() : owner(NULL) {}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Memory management: a histogram may be (exclusively) "owned" by a particular
|
||||||
|
thread (done for histograms that are being collected). By default, a
|
||||||
|
histogram has owner==NULL and is not owned by any particular thread.
|
||||||
|
*/
|
||||||
THD *get_owner() { return owner; }
|
THD *get_owner() { return owner; }
|
||||||
void set_owner(THD *thd) { owner=thd; }
|
void set_owner(THD *thd) { owner=thd; }
|
||||||
private:
|
private:
|
||||||
// Owner is a thread that *exclusively* owns this histogram (and so can
|
|
||||||
// delete it at any time)
|
|
||||||
THD *owner;
|
THD *owner;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -353,75 +367,72 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
An equi-height histogram which stores real values for bucket bounds.
|
This is used to collect the the basic statistics from a Unique object:
|
||||||
|
- count of values
|
||||||
Handles @@histogram_type=JSON_HB
|
- count of distinct values
|
||||||
|
- count of distinct values that have occurred only once
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class Histogram_json_hb : public Histogram_base
|
class Basic_stats_collector
|
||||||
{
|
{
|
||||||
private:
|
ulonglong count; /* number of values retrieved */
|
||||||
size_t size; /* Number of elements in the histogram */
|
ulonglong count_distinct; /* number of distinct values retrieved */
|
||||||
|
/* number of distinct values that occured only once */
|
||||||
/* Collection-time only: collected histogram in the JSON form. */
|
ulonglong count_distinct_single_occurence;
|
||||||
std::string json_text;
|
|
||||||
|
|
||||||
// Array of histogram bucket endpoints in KeyTupleFormat.
|
|
||||||
std::vector<std::string> histogram_bounds;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr const char* JSON_NAME="histogram_hb_v1";
|
Basic_stats_collector()
|
||||||
|
|
||||||
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
|
||||||
const char *hist_data, size_t hist_data_len) override;
|
|
||||||
|
|
||||||
void serialize(Field *field) override;
|
|
||||||
|
|
||||||
Histogram_builder *create_builder(Field *col, uint col_len,
|
|
||||||
ha_rows rows) override;
|
|
||||||
|
|
||||||
// returns number of buckets in the histogram
|
|
||||||
uint get_width() override
|
|
||||||
{
|
{
|
||||||
return (uint)size;
|
count= 0;
|
||||||
|
count_distinct= 0;
|
||||||
|
count_distinct_single_occurence= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Histogram_type get_type() override
|
ulonglong get_count_distinct() const { return count_distinct; }
|
||||||
|
ulonglong get_count_single_occurence() const
|
||||||
{
|
{
|
||||||
return JSON_HB;
|
return count_distinct_single_occurence;
|
||||||
}
|
}
|
||||||
|
ulonglong get_count() const { return count; }
|
||||||
|
|
||||||
void set_json_text(ulonglong sz, uchar *json_text_arg)
|
void next(void *elem, element_count elem_cnt)
|
||||||
{
|
{
|
||||||
size = (uint8) sz;
|
count_distinct++;
|
||||||
json_text.assign((const char*)json_text_arg,
|
if (elem_cnt == 1)
|
||||||
strlen((const char*)json_text_arg));
|
count_distinct_single_occurence++;
|
||||||
|
count+= elem_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint get_size() override
|
|
||||||
{
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
|
|
||||||
ulonglong size) override;
|
|
||||||
|
|
||||||
bool is_available() override {return true; }
|
|
||||||
|
|
||||||
bool is_usable(THD *thd) override
|
|
||||||
{
|
|
||||||
return thd->variables.optimizer_use_condition_selectivity > 3 &&
|
|
||||||
is_available();
|
|
||||||
}
|
|
||||||
|
|
||||||
double point_selectivity(Field *field, key_range *endpoint,
|
|
||||||
double avg_selection) override;
|
|
||||||
double range_selectivity(Field *field, key_range *min_endp,
|
|
||||||
key_range *max_endp) override;
|
|
||||||
private:
|
|
||||||
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Histogram_builder is a helper class that is used to build histograms
|
||||||
|
for columns.
|
||||||
|
|
||||||
|
Do not create directly, call Histogram->get_builder(...);
|
||||||
|
*/
|
||||||
|
|
||||||
|
class Histogram_builder
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
Field *column; /* table field for which the histogram is built */
|
||||||
|
uint col_length; /* size of this field */
|
||||||
|
ha_rows records; /* number of records the histogram is built for */
|
||||||
|
|
||||||
|
Histogram_builder(Field *col, uint col_len, ha_rows rows) :
|
||||||
|
column(col), col_length(col_len), records(rows)
|
||||||
|
{}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// A histogram builder will also collect the counters
|
||||||
|
Basic_stats_collector counters;
|
||||||
|
|
||||||
|
virtual int next(void *elem, element_count elem_cnt)=0;
|
||||||
|
virtual void finalize()=0;
|
||||||
|
virtual ~Histogram_builder(){}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class Columns_statistics;
|
class Columns_statistics;
|
||||||
class Index_statistics;
|
class Index_statistics;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user