diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 3e7d3a307fa..3e7a9a43634 100644 Binary files a/mysql-test/main/statistics_json.result and b/mysql-test/main/statistics_json.result differ diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 0af7d1f6d1d..9ffb27b621d 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -25,4 +25,10 @@ DESCRIBE mysql.column_stats; SELECT * FROM mysql.column_stats; set histogram_type=@save_histogram_type; + +## Remove against Milestone-2 +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES(); +SELECT * FROM mysql.column_stats; +select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats; + DROP TABLE t1; \ No newline at end of file diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 16e3e8b1664..d2cad99f130 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -32,6 +32,9 @@ #include "uniques.h" #include "sql_show.h" #include "sql_partition.h" +#include "my_json_writer.h" + +#include /* The system variable 'use_stat_tables' can take one of the @@ -1070,13 +1073,8 @@ public: stat_field->store(stats->histogram.get_type() + 1); break; case COLUMN_STAT_HISTOGRAM: - if (stats->histogram.get_type() == JSON) { - const char* val = "{'hello': 'world'}"; - stat_field->store(val, strlen(val), &my_charset_bin); - } else { stat_field->store((char *) stats->histogram.get_values(), stats->histogram.get_size(), &my_charset_bin); - } break; } } @@ -1524,6 +1522,7 @@ public: class Histogram_builder { +protected: Field *column; /* table field for which the histogram is built */ uint col_length; /* size of this field */ ha_rows records; /* number of records the histogram is built for */ @@ -1554,13 +1553,15 @@ public: count_distinct_single_occurence= 0; } + virtual ~Histogram_builder() = default; + ulonglong get_count_distinct() const { return count_distinct; } ulonglong get_count_single_occurence() const { return count_distinct_single_occurence; } - int next(void *elem, element_count elem_cnt) + virtual int next(void *elem, element_count elem_cnt) { count_distinct++; if (elem_cnt == 1) @@ -1572,7 +1573,7 @@ public: { column->store_field_value((uchar *) elem, col_length); histogram->set_value(curr_bucket, - column->pos_in_interval(min_value, max_value)); + column->pos_in_interval(min_value, max_value)); curr_bucket++; while (curr_bucket != hist_width && count > bucket_capacity * (curr_bucket + 1)) @@ -1585,6 +1586,57 @@ public: } }; +class Histogram_builder_json : public Histogram_builder +{ +std::vector bucket_bounds; + +public: + Histogram_builder_json(Field *col, uint col_len, ha_rows rows) + : Histogram_builder(col, col_len, rows) + { + Column_statistics *col_stats= col->collected_stats; + min_value= col_stats->min_value; + max_value= col_stats->max_value; + histogram= &col_stats->histogram; + hist_width= histogram->get_width(); + bucket_capacity= (double) records / (hist_width + 1); + curr_bucket= 0; + count= 0; + count_distinct= 0; + count_distinct_single_occurence= 0; + bucket_bounds = {}; + } + + ~Histogram_builder_json() override = default; + + int next(void *elem, element_count elem_cnt) override + { + count_distinct++; + if (elem_cnt == 1) + count_distinct_single_occurence++; + count+= elem_cnt; + if (curr_bucket == hist_width) + return 0; + if (count > bucket_capacity * (curr_bucket + 1)) + { + auto *val= new StringBuffer; + column->val_str(val); + bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin)); + curr_bucket++; + } + return 0; + } + + void build() { + Json_writer *writer = new Json_writer(); + writer->start_array(); + for(auto& value: bucket_bounds) { + writer->add_str(value); + } + writer->end_array(); + histogram->set_values((uchar *) writer->output.get_string()->ptr()); + } +}; C_MODE_START @@ -1594,6 +1646,12 @@ int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) return hist_builder->next(elem, elem_cnt); } +int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +{ + Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg; + return hist_builder->next(elem, elem_cnt); +} + static int count_distinct_single_occurence_walk(void *elem, @@ -1699,10 +1757,22 @@ public: */ void walk_tree_with_histogram(ha_rows rows) { - Histogram_builder hist_builder(table_field, tree_key_length, rows); - tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder); - distincts= hist_builder.get_count_distinct(); - distincts_single_occurence= hist_builder.get_count_single_occurence(); + if(table_field->collected_stats->histogram.get_type() == JSON) + { + Histogram_builder_json hist_builder(table_field, tree_key_length, rows); + tree->walk(table_field->table, json_histogram_build_walk, + (void *) &hist_builder); + hist_builder.build(); + distincts= hist_builder.get_count_distinct(); + distincts_single_occurence= hist_builder.get_count_single_occurence(); + } else + { + Histogram_builder hist_builder(table_field, tree_key_length, rows); + tree->walk(table_field->table, histogram_build_walk, + (void *) &hist_builder); + distincts= hist_builder.get_count_distinct(); + distincts_single_occurence= hist_builder.get_count_single_occurence(); + } } ulonglong get_count_distinct()