From e10d99ce8719fb10100d44022a59492088160d2f Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Sat, 21 Aug 2021 00:50:55 +0100 Subject: [PATCH] Backfill json histogram bounds during building Signed-off-by: Michael Okoko --- mysql-test/main/st_play.result | 38 ++++ mysql-test/main/st_play.test | 21 ++ mysql-test/main/statistics_fetch.result | 278 +++++++++++++++++++++++- mysql-test/main/statistics_fetch.test | 17 +- mysql-test/main/statistics_json.result | 159 +++++++++++--- sql/sql_statistics.cc | 10 +- 6 files changed, 480 insertions(+), 43 deletions(-) create mode 100644 mysql-test/main/st_play.result create mode 100644 mysql-test/main/st_play.test diff --git a/mysql-test/main/st_play.result b/mysql-test/main/st_play.result new file mode 100644 index 00000000000..9a1da440426 --- /dev/null +++ b/mysql-test/main/st_play.result @@ -0,0 +1,38 @@ +create table users ( +city varchar(100) +); +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; +analyze table users persistent for all; +Table Op Msg_type Msg_text +test.users analyze status Engine-independent statistics collected +test.users analyze status OK +select hex(histogram) from mysql.column_stats where table_name='users'; +hex(histogram) +00000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +explain extended select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 97.66 Using where +Warnings: +Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow' +analyze select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 97.66 98.02 Using where +delete from mysql.column_stats where table_name='users'; +set histogram_type=json; +set histogram_size=10; +analyze table users persistent for all; +Table Op Msg_type Msg_text +test.users analyze status Engine-independent statistics collected +test.users analyze status Table is already up to date +select histogram from mysql.column_stats where table_name='users'; +histogram +[] +explain extended select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 50.00 Using where +Warnings: +Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow' +analyze select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where diff --git a/mysql-test/main/st_play.test b/mysql-test/main/st_play.test new file mode 100644 index 00000000000..26aff931668 --- /dev/null +++ b/mysql-test/main/st_play.test @@ -0,0 +1,21 @@ +--source include/have_sequence.inc +create table users ( + city varchar(100) +); +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; + +analyze table users persistent for all; +select hex(histogram) from mysql.column_stats where table_name='users'; +explain extended select * from users where city = 'Moscow'; +analyze select * from users where city = 'Moscow'; + +delete from mysql.column_stats where table_name='users'; + +set histogram_type=json; +set histogram_size=10; + +analyze table users persistent for all; +select histogram from mysql.column_stats where table_name='users'; +explain extended select * from users where city = 'Moscow'; +analyze select * from users where city = 'Moscow'; diff --git a/mysql-test/main/statistics_fetch.result b/mysql-test/main/statistics_fetch.result index 2c79ab44561..5f60cbc7275 100644 --- a/mysql-test/main/statistics_fetch.result +++ b/mysql-test/main/statistics_fetch.result @@ -8,22 +8,22 @@ create table ten(a int primary key); insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1_bin (a varchar(255)); insert into t1_bin select concat('a-', a) from ten; -set histogram_size=10; +set histogram_size=100; analyze table t1_bin persistent for all; Table Op Msg_type Msg_text test.t1_bin analyze status Engine-independent statistics collected test.t1_bin analyze status OK select hex(histogram) from mysql.column_stats where table_name='t1_bin'; hex(histogram) -711C5555388EAAAA8DE3 +00000000000000000000711C711C711C711C711CE338E338E338E338E33855555555555555555555C671C671C671C671C671388E388E388E388E388EAAAAAAAAAAAAAAAAAAAA1BC71BC71BC71BC71BC78DE38DE38DE38DE38DE3FFFFFFFFFFFFFFFFFFFF explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 50.00 Using where +1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 58.82 Using where Warnings: Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t1_bin`.`a` between 'a-3a' and 'zzzzzzzzz' analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 50.00 60.00 Using where +1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where create table t1_json (a varchar(255)); insert into t1_json select concat('a-', a) from ten; set histogram_type=json; @@ -33,26 +33,116 @@ test.t1_json analyze status Engine-independent statistics collected test.t1_json analyze status OK select * from mysql.column_stats where table_name='t1_json'; db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram -test t1_json a a-0 a-9 0.0000 3.0000 1.0000 10 JSON [ +test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [ + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", "a-0", "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", "a-2", "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", "a-4", "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", "a-6", "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", "a-9" ] explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 68.71 Using where +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 60.87 Using where Warnings: Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz' analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 68.71 60.00 Using where +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 60.87 60.00 Using where create table t2_bin(a int); insert into t2_bin select a*10 from ten; set histogram_type=@save_histogram_type; @@ -62,12 +152,12 @@ test.t2_bin analyze status Engine-independent statistics collected test.t2_bin analyze status OK explain extended select * from t2_bin where a between '44' and '55'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 33.33 Using where +1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 11.76 Using where Warnings: Note 1003 select `test`.`t2_bin`.`a` AS `a` from `test`.`t2_bin` where `test`.`t2_bin`.`a` between '44' and '55' analyze select * from t2_bin where a between '44' and '55'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 10.00 33.33 10.00 Using where +1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 10.00 11.76 10.00 Using where create table t2_json(a int); insert into t2_json select a*10 from ten; set histogram_type=json; @@ -77,27 +167,191 @@ test.t2_json analyze status Engine-independent statistics collected test.t2_json analyze status OK select * from mysql.column_stats where table_name='t2_json'; db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram -test t2_json a 0 90 0.0000 4.0000 1.0000 10 JSON [ +test t2_json a 0 90 0.0000 4.0000 1.0000 100 JSON [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", "0", "10", + "10", + "10", + "10", + "10", + "10", + "10", + "10", + "10", + "10", + "20", + "20", + "20", + "20", + "20", + "20", + "20", + "20", + "20", "20", "30", + "30", + "30", + "30", + "30", + "30", + "30", + "30", + "30", + "30", + "40", + "40", + "40", + "40", + "40", + "40", + "40", + "40", + "40", "40", "50", + "50", + "50", + "50", + "50", + "50", + "50", + "50", + "50", + "50", + "60", + "60", + "60", + "60", + "60", + "60", + "60", + "60", + "60", "60", "70", + "70", + "70", + "70", + "70", + "70", + "70", + "70", + "70", + "70", "80", + "80", + "80", + "80", + "80", + "80", + "80", + "80", + "80", + "80", + "90", + "90", + "90", + "90", + "90", + "90", + "90", + "90", + "90", "90" ] explain extended select * from t2_json where a between '44' and '55'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 11.00 Using where +1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.10 Using where Warnings: Note 1003 select `test`.`t2_json`.`a` AS `a` from `test`.`t2_json` where `test`.`t2_json`.`a` between '44' and '55' analyze select * from t2_json where a between '44' and '55'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.00 11.00 10.00 Using where +1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.00 10.10 10.00 Using where +create table users ( +city varchar(100) +); +set histogram_size=50; +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; +set histogram_type=json; +analyze table users persistent for all; +Table Op Msg_type Msg_text +test.users analyze status Engine-independent statistics collected +test.users analyze status OK +select histogram from mysql.column_stats where table_name='users'; +histogram +[ + "Helsinki", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow", + "Moscow" +] +explain extended select * from users where city <= 'Moscow'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 100.00 Using where +Warnings: +Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` <= 'Moscow' +analyze select * from users where city <= 'Moscow'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 100.00 100.00 Using where drop table t1_bin; drop table t1_json; drop table t2_bin; drop table t2_json; +drop table users; diff --git a/mysql-test/main/statistics_fetch.test b/mysql-test/main/statistics_fetch.test index d0313fe5f2b..bad5918abc1 100644 --- a/mysql-test/main/statistics_fetch.test +++ b/mysql-test/main/statistics_fetch.test @@ -12,7 +12,7 @@ insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1_bin (a varchar(255)); insert into t1_bin select concat('a-', a) from ten; -set histogram_size=10; +set histogram_size=100; analyze table t1_bin persistent for all; select hex(histogram) from mysql.column_stats where table_name='t1_bin'; explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; @@ -42,8 +42,23 @@ select * from mysql.column_stats where table_name='t2_json'; explain extended select * from t2_json where a between '44' and '55'; analyze select * from t2_json where a between '44' and '55'; +--source include/have_sequence.inc +create table users ( + city varchar(100) +); +set histogram_size=50; +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; +set histogram_type=json; +analyze table users persistent for all; +select histogram from mysql.column_stats where table_name='users'; +explain extended select * from users where city <= 'Moscow'; +analyze select * from users where city <= 'Moscow'; + + drop table t1_bin; drop table t1_json; drop table t2_bin; drop table t2_json; +drop table users; diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 6a8df8f0f76..82d6e3ea9a9 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -95,38 +95,139 @@ test t1 a 0 49 0.0000 4.0000 1.0000 25 JSON [ "44", "47" ] -test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 17.1250 6.4000 5 JSON [ +test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 17.1250 6.4000 25 JSON [ + "vvvvvvvvvvvvv", + "vvvvvvvvvvvvv", + "vvvvvvvvvvvvv", + "vvvvvvvvvvvvv", + "vvvvvvvvvvvvv", "vvvvvvvvvvvvv", "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "wwwwwwwwwwwwwwwwwwwwwwwwwwww", + "xxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxxxxxxxxxxxxx", "yyy", + "yyy", + "yyy", + "yyy", + "yyy", + "zzzzzzzzzzzzzzzzzz", + "zzzzzzzzzzzzzzzzzz", + "zzzzzzzzzzzzzzzzzz", + "zzzzzzzzzzzzzzzzzz", "zzzzzzzzzzzzzzzzzz" ] -test t1 c aaaa dddddddd 0.1250 6.6571 7.0000 5 JSON [ +test t1 c aaaa dddddddd 0.1250 6.6571 7.0000 25 JSON [ + "aaaa", + "aaaa", + "aaaa", + "aaaa", + "aaaa", "aaaa", "bbb", "bbbbbb", + "bbbbbb", + "bbbbbb", + "bbbbbb", + "bbbbbb", "ccccccccc", + "ccccccccc", + "ccccccccc", + "ccccccccc", + "ccccccccc", + "ccccccccc", + "dddddddd", + "dddddddd", + "dddddddd", + "dddddddd", + "dddddddd", + "dddddddd", "dddddddd" ] -test t1 d 1989-03-12 1999-07-23 0.1500 3.0000 8.5000 4 JSON [ +test t1 d 1989-03-12 1999-07-23 0.1500 3.0000 8.5000 25 JSON [ + "1989-03-12", + "1989-03-12", + "1989-03-12", + "1989-03-12", + "1989-03-12", + "1989-03-12", + "1989-03-12", "1989-03-12", "1990-05-15", - "1998-08-28", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1990-05-15", + "1999-07-23", + "1999-07-23", + "1999-07-23", + "1999-07-23", + "1999-07-23", "1999-07-23" ] -test t1 e 0.01 0.112 0.2250 8.0000 6.2000 5 JSON [ +test t1 e 0.01 0.112 0.2250 8.0000 6.2000 25 JSON [ + "0.01", + "0.01", + "0.01", + "0.01", + "0.01", + "0.01", + "0.01", + "0.01", + "0.01", "0.01", "0.012", "0.05", + "0.05", + "0.05", + "0.05", "0.1", + "0.1", + "0.1", + "0.1", + "0.1", + "0.1", + "0.1", + "0.112", + "0.112", "0.112" ] -test t1 f 1 5 0.2000 1.0000 6.4000 5 JSON [ +test t1 f 1 5 0.2000 1.0000 6.4000 25 JSON [ + "", + "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "", "", + "", + "", + "", + "", + "", + "", + "", + "", + "", "" ] SELECT * FROM mysql.index_stats; @@ -528,14 +629,14 @@ Country ABW ZWE 50 JSON [ "ARM", "BHS", "BRA", - "BRB", - "BRN", + "BRA", + "BRA", "CAN", "CHN", - "CIV", - "CMR", - "COD", - "COG", + "CHN", + "CHN", + "CHN", + "CHN", "COL", "DEU", "DZA", @@ -543,20 +644,20 @@ Country ABW ZWE 50 JSON [ "FRA", "GBR", "IDN", + "IDN", + "IND", + "IND", + "IND", "IND", - "IRL", "IRN", - "IRQ", - "ISL", - "ISR", "ITA", "JPN", - "KAZ", - "KEN", + "JPN", + "JPN", "KOR", "LKA", "MEX", - "MHL", + "MEX", "MMR", "NGA", "NZL", @@ -565,15 +666,15 @@ Country ABW ZWE 50 JSON [ "POL", "QAT", "RUS", - "RWA", + "RUS", "SAU", "TCD", "TUR", "UKR", "USA", - "UZB", - "VAT", - "VCT", + "USA", + "USA", + "USA", "VNM" ] Population 42 10500000 50 JSON [ @@ -694,10 +795,10 @@ Language Abhyasi [South]Mande 50 JSON [ "Danish", "Embera", "English", - "Eskimo Languages", - "Estonian", + "English", + "English", + "French", "French", - "Fries", "Futuna", "German", "Greek", @@ -724,7 +825,7 @@ Language Abhyasi [South]Mande 50 JSON [ "Shona", "Songhai", "Spanish", - "Sranantonga", + "Spanish", "Tamashek", "Thai", "Tswana", @@ -733,6 +834,8 @@ Language Abhyasi [South]Mande 50 JSON [ "Wolea" ] Percentage 0.0 99.9 50 JSON [ + "0.0", + "0.0", "0.0", "0.1", "0.2", @@ -742,8 +845,6 @@ Percentage 0.0 99.9 50 JSON [ "0.6", "0.7", "0.8", - "0.9", - "1.0", "1.1", "1.3", "1.4", diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 5ab90a7587c..de9beaf7e32 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1988,8 +1988,16 @@ public: column->store_field_value((uchar *) elem, col_length); StringBuffer val; column->val_str(&val); - bucket_bounds.emplace_back(val.c_ptr()); + auto it = bucket_bounds.begin(); + bucket_bounds.insert(it+curr_bucket, val.c_ptr()); curr_bucket++; + while (curr_bucket != hist_width && + count > bucket_capacity * (curr_bucket + 1)) + { + auto it = bucket_bounds.begin(); + bucket_bounds.insert(it+curr_bucket, bucket_bounds[curr_bucket-1]); + curr_bucket++; + } } return 0; }