From 237447de63c281e36e820bfef43257cef91bee54 Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Thu, 24 Jun 2021 07:41:09 +0100 Subject: [PATCH] rough base for json histogram builder Signed-off-by: Michael Okoko --- mysql-test/main/statistics_json.result | Bin 1365 -> 3493 bytes mysql-test/main/statistics_json.test | 6 ++ sql/sql_statistics.cc | 92 ++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 11 deletions(-) diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 3e7d3a307fa02f184d2c36efc35e5a060634c35f..3e7a9a43634820375c226f93045309fec268058c 100644 GIT binary patch literal 3493 zcmeHK&2HO95Ec>y!S2;PG&v38gUEt4EGJG82nH-m>qNCg8YG=W2?}B*u4Mw`%Hb{* zsh2)NpQ88PgWUTFeT2M4yStPnI+2rHf*?P0;Lmb)=jWUGcDac+A!I^=JfjkR4LlbT z0TiY*O=37pgq)0Xnt`03GoU<%mkdUfgBs6SsV`)n@Ueh|LpB#L(yh@XEixVnNu|J> zIL;^MAn7n=kemXWsWk=2MiUDrVPDWGiHB^~KnPQ$Dbn#8N@KB^!ux zn$CY`NMlk6MEE%2?B|5X?3}5K%B23HkV!@Y6n-fiNfuu<-8;e^614s90a*ii6CWll zjZvteL~fC$=!nfxhn>o$`qHY_+c#&E4pzA*Dk?7bb z?dZ`#-KlrH6V)}90aNRj{9d*j8m5zBb@fF<`~-)-*Ix_v1(X%Us=%!Bzshnomz9+1 z$5m!l30OtA^wU%(QiVN4)gGFnE4yxnwzsyGpvHr(U2`vAcXsa~oe=aK1Khc^*d6EH zA^h84pIH7}$c^hYBY)ql{qyRNTJ7I!<-5M>_aATFG~@dl<$YuE`StA|o@f_0YBy?c ztfza|3@O)2cnl}k)4X@$mFNUKGDVn`3%^8j7P^e940x^~Bg5|&NqQ2br zXjMF)t0^T2fA_R-S=uIL#;7S`w^~-*T$v t_K#T=D;j$Xtdgp>o$q%|+22kc8fH)WcH;eB-)rj|cYN=7`Lol+{{wRFJh=b> delta 46 zcmZ1~eU)ni7pqXUdPZtaPQJR8f_iy=QBI0_?PLx%Eq<3E{{V#$M<*Xwg%U$+E&x#^ B4Lkq< diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 0af7d1f6d1d..9ffb27b621d 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -25,4 +25,10 @@ DESCRIBE mysql.column_stats; SELECT * FROM mysql.column_stats; set histogram_type=@save_histogram_type; + +## Remove against Milestone-2 +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES(); +SELECT * FROM mysql.column_stats; +select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats; + DROP TABLE t1; \ No newline at end of file diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 16e3e8b1664..d2cad99f130 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -32,6 +32,9 @@ #include "uniques.h" #include "sql_show.h" #include "sql_partition.h" +#include "my_json_writer.h" + +#include /* The system variable 'use_stat_tables' can take one of the @@ -1070,13 +1073,8 @@ public: stat_field->store(stats->histogram.get_type() + 1); break; case COLUMN_STAT_HISTOGRAM: - if (stats->histogram.get_type() == JSON) { - const char* val = "{'hello': 'world'}"; - stat_field->store(val, strlen(val), &my_charset_bin); - } else { stat_field->store((char *) stats->histogram.get_values(), stats->histogram.get_size(), &my_charset_bin); - } break; } } @@ -1524,6 +1522,7 @@ public: class Histogram_builder { +protected: Field *column; /* table field for which the histogram is built */ uint col_length; /* size of this field */ ha_rows records; /* number of records the histogram is built for */ @@ -1554,13 +1553,15 @@ public: count_distinct_single_occurence= 0; } + virtual ~Histogram_builder() = default; + ulonglong get_count_distinct() const { return count_distinct; } ulonglong get_count_single_occurence() const { return count_distinct_single_occurence; } - int next(void *elem, element_count elem_cnt) + virtual int next(void *elem, element_count elem_cnt) { count_distinct++; if (elem_cnt == 1) @@ -1572,7 +1573,7 @@ public: { column->store_field_value((uchar *) elem, col_length); histogram->set_value(curr_bucket, - column->pos_in_interval(min_value, max_value)); + column->pos_in_interval(min_value, max_value)); curr_bucket++; while (curr_bucket != hist_width && count > bucket_capacity * (curr_bucket + 1)) @@ -1585,6 +1586,57 @@ public: } }; +class Histogram_builder_json : public Histogram_builder +{ +std::vector bucket_bounds; + +public: + Histogram_builder_json(Field *col, uint col_len, ha_rows rows) + : Histogram_builder(col, col_len, rows) + { + Column_statistics *col_stats= col->collected_stats; + min_value= col_stats->min_value; + max_value= col_stats->max_value; + histogram= &col_stats->histogram; + hist_width= histogram->get_width(); + bucket_capacity= (double) records / (hist_width + 1); + curr_bucket= 0; + count= 0; + count_distinct= 0; + count_distinct_single_occurence= 0; + bucket_bounds = {}; + } + + ~Histogram_builder_json() override = default; + + int next(void *elem, element_count elem_cnt) override + { + count_distinct++; + if (elem_cnt == 1) + count_distinct_single_occurence++; + count+= elem_cnt; + if (curr_bucket == hist_width) + return 0; + if (count > bucket_capacity * (curr_bucket + 1)) + { + auto *val= new StringBuffer; + column->val_str(val); + bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin)); + curr_bucket++; + } + return 0; + } + + void build() { + Json_writer *writer = new Json_writer(); + writer->start_array(); + for(auto& value: bucket_bounds) { + writer->add_str(value); + } + writer->end_array(); + histogram->set_values((uchar *) writer->output.get_string()->ptr()); + } +}; C_MODE_START @@ -1594,6 +1646,12 @@ int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) return hist_builder->next(elem, elem_cnt); } +int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +{ + Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg; + return hist_builder->next(elem, elem_cnt); +} + static int count_distinct_single_occurence_walk(void *elem, @@ -1699,10 +1757,22 @@ public: */ void walk_tree_with_histogram(ha_rows rows) { - Histogram_builder hist_builder(table_field, tree_key_length, rows); - tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder); - distincts= hist_builder.get_count_distinct(); - distincts_single_occurence= hist_builder.get_count_single_occurence(); + if(table_field->collected_stats->histogram.get_type() == JSON) + { + Histogram_builder_json hist_builder(table_field, tree_key_length, rows); + tree->walk(table_field->table, json_histogram_build_walk, + (void *) &hist_builder); + hist_builder.build(); + distincts= hist_builder.get_count_distinct(); + distincts_single_occurence= hist_builder.get_count_single_occurence(); + } else + { + Histogram_builder hist_builder(table_field, tree_key_length, rows); + tree->walk(table_field->table, histogram_build_walk, + (void *) &hist_builder); + distincts= hist_builder.get_count_distinct(); + distincts_single_occurence= hist_builder.get_count_single_occurence(); + } } ulonglong get_count_distinct()