mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
rough base for json histogram builder
Signed-off-by: Michael Okoko <okokomichaels@outlook.com>
This commit is contained in:
committed by
Sergei Petrunia
parent
567552b410
commit
237447de63
Binary file not shown.
@ -25,4 +25,10 @@ DESCRIBE mysql.column_stats;
|
|||||||
SELECT * FROM mysql.column_stats;
|
SELECT * FROM mysql.column_stats;
|
||||||
|
|
||||||
set histogram_type=@save_histogram_type;
|
set histogram_type=@save_histogram_type;
|
||||||
|
|
||||||
|
## Remove against Milestone-2
|
||||||
|
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
|
||||||
|
SELECT * FROM mysql.column_stats;
|
||||||
|
select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats;
|
||||||
|
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
@ -32,6 +32,9 @@
|
|||||||
#include "uniques.h"
|
#include "uniques.h"
|
||||||
#include "sql_show.h"
|
#include "sql_show.h"
|
||||||
#include "sql_partition.h"
|
#include "sql_partition.h"
|
||||||
|
#include "my_json_writer.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The system variable 'use_stat_tables' can take one of the
|
The system variable 'use_stat_tables' can take one of the
|
||||||
@ -1070,13 +1073,8 @@ public:
|
|||||||
stat_field->store(stats->histogram.get_type() + 1);
|
stat_field->store(stats->histogram.get_type() + 1);
|
||||||
break;
|
break;
|
||||||
case COLUMN_STAT_HISTOGRAM:
|
case COLUMN_STAT_HISTOGRAM:
|
||||||
if (stats->histogram.get_type() == JSON) {
|
|
||||||
const char* val = "{'hello': 'world'}";
|
|
||||||
stat_field->store(val, strlen(val), &my_charset_bin);
|
|
||||||
} else {
|
|
||||||
stat_field->store((char *) stats->histogram.get_values(),
|
stat_field->store((char *) stats->histogram.get_values(),
|
||||||
stats->histogram.get_size(), &my_charset_bin);
|
stats->histogram.get_size(), &my_charset_bin);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1524,6 +1522,7 @@ public:
|
|||||||
|
|
||||||
class Histogram_builder
|
class Histogram_builder
|
||||||
{
|
{
|
||||||
|
protected:
|
||||||
Field *column; /* table field for which the histogram is built */
|
Field *column; /* table field for which the histogram is built */
|
||||||
uint col_length; /* size of this field */
|
uint col_length; /* size of this field */
|
||||||
ha_rows records; /* number of records the histogram is built for */
|
ha_rows records; /* number of records the histogram is built for */
|
||||||
@ -1554,13 +1553,15 @@ public:
|
|||||||
count_distinct_single_occurence= 0;
|
count_distinct_single_occurence= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual ~Histogram_builder() = default;
|
||||||
|
|
||||||
ulonglong get_count_distinct() const { return count_distinct; }
|
ulonglong get_count_distinct() const { return count_distinct; }
|
||||||
ulonglong get_count_single_occurence() const
|
ulonglong get_count_single_occurence() const
|
||||||
{
|
{
|
||||||
return count_distinct_single_occurence;
|
return count_distinct_single_occurence;
|
||||||
}
|
}
|
||||||
|
|
||||||
int next(void *elem, element_count elem_cnt)
|
virtual int next(void *elem, element_count elem_cnt)
|
||||||
{
|
{
|
||||||
count_distinct++;
|
count_distinct++;
|
||||||
if (elem_cnt == 1)
|
if (elem_cnt == 1)
|
||||||
@ -1572,7 +1573,7 @@ public:
|
|||||||
{
|
{
|
||||||
column->store_field_value((uchar *) elem, col_length);
|
column->store_field_value((uchar *) elem, col_length);
|
||||||
histogram->set_value(curr_bucket,
|
histogram->set_value(curr_bucket,
|
||||||
column->pos_in_interval(min_value, max_value));
|
column->pos_in_interval(min_value, max_value));
|
||||||
curr_bucket++;
|
curr_bucket++;
|
||||||
while (curr_bucket != hist_width &&
|
while (curr_bucket != hist_width &&
|
||||||
count > bucket_capacity * (curr_bucket + 1))
|
count > bucket_capacity * (curr_bucket + 1))
|
||||||
@ -1585,6 +1586,57 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Histogram_builder_json : public Histogram_builder
|
||||||
|
{
|
||||||
|
std::vector<String> bucket_bounds;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
|
||||||
|
: Histogram_builder(col, col_len, rows)
|
||||||
|
{
|
||||||
|
Column_statistics *col_stats= col->collected_stats;
|
||||||
|
min_value= col_stats->min_value;
|
||||||
|
max_value= col_stats->max_value;
|
||||||
|
histogram= &col_stats->histogram;
|
||||||
|
hist_width= histogram->get_width();
|
||||||
|
bucket_capacity= (double) records / (hist_width + 1);
|
||||||
|
curr_bucket= 0;
|
||||||
|
count= 0;
|
||||||
|
count_distinct= 0;
|
||||||
|
count_distinct_single_occurence= 0;
|
||||||
|
bucket_bounds = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
~Histogram_builder_json() override = default;
|
||||||
|
|
||||||
|
int next(void *elem, element_count elem_cnt) override
|
||||||
|
{
|
||||||
|
count_distinct++;
|
||||||
|
if (elem_cnt == 1)
|
||||||
|
count_distinct_single_occurence++;
|
||||||
|
count+= elem_cnt;
|
||||||
|
if (curr_bucket == hist_width)
|
||||||
|
return 0;
|
||||||
|
if (count > bucket_capacity * (curr_bucket + 1))
|
||||||
|
{
|
||||||
|
auto *val= new StringBuffer<MAX_FIELD_WIDTH>;
|
||||||
|
column->val_str(val);
|
||||||
|
bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin));
|
||||||
|
curr_bucket++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void build() {
|
||||||
|
Json_writer *writer = new Json_writer();
|
||||||
|
writer->start_array();
|
||||||
|
for(auto& value: bucket_bounds) {
|
||||||
|
writer->add_str(value);
|
||||||
|
}
|
||||||
|
writer->end_array();
|
||||||
|
histogram->set_values((uchar *) writer->output.get_string()->ptr());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
C_MODE_START
|
C_MODE_START
|
||||||
|
|
||||||
@ -1594,6 +1646,12 @@ int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
|
|||||||
return hist_builder->next(elem, elem_cnt);
|
return hist_builder->next(elem, elem_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
|
||||||
|
{
|
||||||
|
Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg;
|
||||||
|
return hist_builder->next(elem, elem_cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int count_distinct_single_occurence_walk(void *elem,
|
static int count_distinct_single_occurence_walk(void *elem,
|
||||||
@ -1699,10 +1757,22 @@ public:
|
|||||||
*/
|
*/
|
||||||
void walk_tree_with_histogram(ha_rows rows)
|
void walk_tree_with_histogram(ha_rows rows)
|
||||||
{
|
{
|
||||||
Histogram_builder hist_builder(table_field, tree_key_length, rows);
|
if(table_field->collected_stats->histogram.get_type() == JSON)
|
||||||
tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder);
|
{
|
||||||
distincts= hist_builder.get_count_distinct();
|
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
|
||||||
distincts_single_occurence= hist_builder.get_count_single_occurence();
|
tree->walk(table_field->table, json_histogram_build_walk,
|
||||||
|
(void *) &hist_builder);
|
||||||
|
hist_builder.build();
|
||||||
|
distincts= hist_builder.get_count_distinct();
|
||||||
|
distincts_single_occurence= hist_builder.get_count_single_occurence();
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
Histogram_builder hist_builder(table_field, tree_key_length, rows);
|
||||||
|
tree->walk(table_field->table, histogram_build_walk,
|
||||||
|
(void *) &hist_builder);
|
||||||
|
distincts= hist_builder.get_count_distinct();
|
||||||
|
distincts_single_occurence= hist_builder.get_count_single_occurence();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ulonglong get_count_distinct()
|
ulonglong get_count_distinct()
|
||||||
|
Reference in New Issue
Block a user