From bff65a813e32a985368b868db4ff03185f1995a7 Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Sat, 21 Aug 2021 09:17:23 +0100 Subject: [PATCH] Implement point selectivity for JSON histograms * Also merges tests relating to JSON statistics into one file Signed-off-by: Michael Okoko --- mysql-test/main/st_play.result | 38 --- mysql-test/main/st_play.test | 21 -- mysql-test/main/statistics_fetch.result | 357 ------------------------ mysql-test/main/statistics_fetch.test | 64 ----- mysql-test/main/statistics_json.result | 173 ++++++++++++ mysql-test/main/statistics_json.test | 50 +++- sql/sql_statistics.cc | 43 ++- sql/sql_statistics.h | 9 +- 8 files changed, 254 insertions(+), 501 deletions(-) delete mode 100644 mysql-test/main/st_play.result delete mode 100644 mysql-test/main/st_play.test delete mode 100644 mysql-test/main/statistics_fetch.result delete mode 100644 mysql-test/main/statistics_fetch.test diff --git a/mysql-test/main/st_play.result b/mysql-test/main/st_play.result deleted file mode 100644 index 9a1da440426..00000000000 --- a/mysql-test/main/st_play.result +++ /dev/null @@ -1,38 +0,0 @@ -create table users ( -city varchar(100) -); -insert into users select 'Moscow' from seq_1_to_99; -insert into users select 'Helsinki' from seq_1_to_2; -analyze table users persistent for all; -Table Op Msg_type Msg_text -test.users analyze status Engine-independent statistics collected -test.users analyze status OK -select hex(histogram) from mysql.column_stats where table_name='users'; -hex(histogram) -00000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -explain extended select * from users where city = 'Moscow'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 97.66 Using where -Warnings: -Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow' -analyze select * from users where city = 'Moscow'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 97.66 98.02 Using where -delete from mysql.column_stats where table_name='users'; -set histogram_type=json; -set histogram_size=10; -analyze table users persistent for all; -Table Op Msg_type Msg_text -test.users analyze status Engine-independent statistics collected -test.users analyze status Table is already up to date -select histogram from mysql.column_stats where table_name='users'; -histogram -[] -explain extended select * from users where city = 'Moscow'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 50.00 Using where -Warnings: -Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow' -analyze select * from users where city = 'Moscow'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where diff --git a/mysql-test/main/st_play.test b/mysql-test/main/st_play.test deleted file mode 100644 index 26aff931668..00000000000 --- a/mysql-test/main/st_play.test +++ /dev/null @@ -1,21 +0,0 @@ ---source include/have_sequence.inc -create table users ( - city varchar(100) -); -insert into users select 'Moscow' from seq_1_to_99; -insert into users select 'Helsinki' from seq_1_to_2; - -analyze table users persistent for all; -select hex(histogram) from mysql.column_stats where table_name='users'; -explain extended select * from users where city = 'Moscow'; -analyze select * from users where city = 'Moscow'; - -delete from mysql.column_stats where table_name='users'; - -set histogram_type=json; -set histogram_size=10; - -analyze table users persistent for all; -select histogram from mysql.column_stats where table_name='users'; -explain extended select * from users where city = 'Moscow'; -analyze select * from users where city = 'Moscow'; diff --git a/mysql-test/main/statistics_fetch.result b/mysql-test/main/statistics_fetch.result deleted file mode 100644 index 5f60cbc7275..00000000000 --- a/mysql-test/main/statistics_fetch.result +++ /dev/null @@ -1,357 +0,0 @@ -# -# Test to inspect the range_selectivity returned by Histogram_json and Histogram_binary -# todo: should be merged with statistics_json.test -# -set @save_histogram_type=@@histogram_type; -set @save_histogram_size=@@histogram_size; -create table ten(a int primary key); -insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t1_bin (a varchar(255)); -insert into t1_bin select concat('a-', a) from ten; -set histogram_size=100; -analyze table t1_bin persistent for all; -Table Op Msg_type Msg_text -test.t1_bin analyze status Engine-independent statistics collected -test.t1_bin analyze status OK -select hex(histogram) from mysql.column_stats where table_name='t1_bin'; -hex(histogram) -00000000000000000000711C711C711C711C711CE338E338E338E338E33855555555555555555555C671C671C671C671C671388E388E388E388E388EAAAAAAAAAAAAAAAAAAAA1BC71BC71BC71BC71BC78DE38DE38DE38DE38DE3FFFFFFFFFFFFFFFFFFFF -explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 58.82 Using where -Warnings: -Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t1_bin`.`a` between 'a-3a' and 'zzzzzzzzz' -analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where -create table t1_json (a varchar(255)); -insert into t1_json select concat('a-', a) from ten; -set histogram_type=json; -analyze table t1_json persistent for all; -Table Op Msg_type Msg_text -test.t1_json analyze status Engine-independent statistics collected -test.t1_json analyze status OK -select * from mysql.column_stats where table_name='t1_json'; -db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram -test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [ - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-0", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-1", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-2", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-3", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-4", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-5", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-6", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-7", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-8", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9", - "a-9" -] -explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 60.87 Using where -Warnings: -Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz' -analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 60.87 60.00 Using where -create table t2_bin(a int); -insert into t2_bin select a*10 from ten; -set histogram_type=@save_histogram_type; -analyze table t2_bin persistent for all; -Table Op Msg_type Msg_text -test.t2_bin analyze status Engine-independent statistics collected -test.t2_bin analyze status OK -explain extended select * from t2_bin where a between '44' and '55'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 11.76 Using where -Warnings: -Note 1003 select `test`.`t2_bin`.`a` AS `a` from `test`.`t2_bin` where `test`.`t2_bin`.`a` between '44' and '55' -analyze select * from t2_bin where a between '44' and '55'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t2_bin ALL NULL NULL NULL NULL 10 10.00 11.76 10.00 Using where -create table t2_json(a int); -insert into t2_json select a*10 from ten; -set histogram_type=json; -analyze table t2_json persistent for all; -Table Op Msg_type Msg_text -test.t2_json analyze status Engine-independent statistics collected -test.t2_json analyze status OK -select * from mysql.column_stats where table_name='t2_json'; -db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram -test t2_json a 0 90 0.0000 4.0000 1.0000 100 JSON [ - "0", - "0", - "0", - "0", - "0", - "0", - "0", - "0", - "0", - "0", - "10", - "10", - "10", - "10", - "10", - "10", - "10", - "10", - "10", - "10", - "20", - "20", - "20", - "20", - "20", - "20", - "20", - "20", - "20", - "20", - "30", - "30", - "30", - "30", - "30", - "30", - "30", - "30", - "30", - "30", - "40", - "40", - "40", - "40", - "40", - "40", - "40", - "40", - "40", - "40", - "50", - "50", - "50", - "50", - "50", - "50", - "50", - "50", - "50", - "50", - "60", - "60", - "60", - "60", - "60", - "60", - "60", - "60", - "60", - "60", - "70", - "70", - "70", - "70", - "70", - "70", - "70", - "70", - "70", - "70", - "80", - "80", - "80", - "80", - "80", - "80", - "80", - "80", - "80", - "80", - "90", - "90", - "90", - "90", - "90", - "90", - "90", - "90", - "90", - "90" -] -explain extended select * from t2_json where a between '44' and '55'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.10 Using where -Warnings: -Note 1003 select `test`.`t2_json`.`a` AS `a` from `test`.`t2_json` where `test`.`t2_json`.`a` between '44' and '55' -analyze select * from t2_json where a between '44' and '55'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t2_json ALL NULL NULL NULL NULL 10 10.00 10.10 10.00 Using where -create table users ( -city varchar(100) -); -set histogram_size=50; -insert into users select 'Moscow' from seq_1_to_99; -insert into users select 'Helsinki' from seq_1_to_2; -set histogram_type=json; -analyze table users persistent for all; -Table Op Msg_type Msg_text -test.users analyze status Engine-independent statistics collected -test.users analyze status OK -select histogram from mysql.column_stats where table_name='users'; -histogram -[ - "Helsinki", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow", - "Moscow" -] -explain extended select * from users where city <= 'Moscow'; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 100.00 Using where -Warnings: -Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` <= 'Moscow' -analyze select * from users where city <= 'Moscow'; -id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 100.00 100.00 Using where -drop table t1_bin; -drop table t1_json; -drop table t2_bin; -drop table t2_json; -drop table users; diff --git a/mysql-test/main/statistics_fetch.test b/mysql-test/main/statistics_fetch.test deleted file mode 100644 index bad5918abc1..00000000000 --- a/mysql-test/main/statistics_fetch.test +++ /dev/null @@ -1,64 +0,0 @@ ---echo # ---echo # Test to inspect the range_selectivity returned by Histogram_json and Histogram_binary ---echo # todo: should be merged with statistics_json.test ---echo # - -set @save_histogram_type=@@histogram_type; -set @save_histogram_size=@@histogram_size; - -create table ten(a int primary key); -insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); - -create table t1_bin (a varchar(255)); -insert into t1_bin select concat('a-', a) from ten; - -set histogram_size=100; -analyze table t1_bin persistent for all; -select hex(histogram) from mysql.column_stats where table_name='t1_bin'; -explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; -analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; - -create table t1_json (a varchar(255)); -insert into t1_json select concat('a-', a) from ten; -set histogram_type=json; -analyze table t1_json persistent for all; -select * from mysql.column_stats where table_name='t1_json'; -explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; -analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; - - -create table t2_bin(a int); -insert into t2_bin select a*10 from ten; -set histogram_type=@save_histogram_type; -analyze table t2_bin persistent for all; -explain extended select * from t2_bin where a between '44' and '55'; -analyze select * from t2_bin where a between '44' and '55'; - -create table t2_json(a int); -insert into t2_json select a*10 from ten; -set histogram_type=json; -analyze table t2_json persistent for all; -select * from mysql.column_stats where table_name='t2_json'; -explain extended select * from t2_json where a between '44' and '55'; -analyze select * from t2_json where a between '44' and '55'; - ---source include/have_sequence.inc -create table users ( - city varchar(100) -); -set histogram_size=50; -insert into users select 'Moscow' from seq_1_to_99; -insert into users select 'Helsinki' from seq_1_to_2; -set histogram_type=json; -analyze table users persistent for all; -select histogram from mysql.column_stats where table_name='users'; -explain extended select * from users where city <= 'Moscow'; -analyze select * from users where city <= 'Moscow'; - - -drop table t1_bin; -drop table t1_json; -drop table t2_bin; -drop table t2_json; -drop table users; - diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 82d6e3ea9a9..8c46e844862 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -4,6 +4,175 @@ drop table if exists t1; set @save_histogram_type=@@histogram_type; set @save_histogram_size=@@histogram_size; +create table ten(a int primary key); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1_bin (a varchar(255)); +insert into t1_bin select concat('a-', a) from ten; +set histogram_size=100; +analyze table t1_bin persistent for all; +Table Op Msg_type Msg_text +test.t1_bin analyze status Engine-independent statistics collected +test.t1_bin analyze status OK +select hex(histogram) from mysql.column_stats where table_name='t1_bin'; +hex(histogram) +00000000000000000000711C711C711C711C711CE338E338E338E338E33855555555555555555555C671C671C671C671C671388E388E388E388E388EAAAAAAAAAAAAAAAAAAAA1BC71BC71BC71BC71BC78DE38DE38DE38DE38DE3FFFFFFFFFFFFFFFFFFFF +explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 58.82 Using where +Warnings: +Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t1_bin`.`a` between 'a-3a' and 'zzzzzzzzz' +analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where +create table t1_json (a varchar(255)); +insert into t1_json select concat('a-', a) from ten; +set histogram_type=json; +analyze table t1_json persistent for all; +Table Op Msg_type Msg_text +test.t1_json analyze status Engine-independent statistics collected +test.t1_json analyze status OK +select * from mysql.column_stats where table_name='t1_json'; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [ + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-0", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-1", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-2", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-3", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-4", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-5", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-6", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-7", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-8", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9", + "a-9" +] +explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 60.87 Using where +Warnings: +Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz' +analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 60.87 60.00 Using where +create table users ( +city varchar(100) +); +set histogram_size=50; +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; +set histogram_type=json; +analyze table users persistent for all; +Table Op Msg_type Msg_text +test.users analyze status Engine-independent statistics collected +test.users analyze status OK +explain extended select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 98.04 Using where +Warnings: +Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Moscow' +analyze select * from users where city = 'Moscow'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 98.04 98.02 Using where +explain extended select * from users where city = 'Helsinki'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 50.00 Using where +Warnings: +Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Helsinki' +analyze select * from users where city = 'helsinki'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 50.00 1.98 Using where +drop table t1_bin; +drop table t1_json; +drop table users; CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY, b varchar(32), @@ -252,6 +421,10 @@ Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` A analyze select * from t1 where a between '20' and '70'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 ALL PRIMARY NULL NULL NULL 40 40.00 57.50 57.50 Using where +UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1'; +FLUSH TABLES; +explain extended select * from t1 where a between '20' and '70'; +ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'. DELETE FROM mysql.column_stats; DROP TABLE t1; create schema world; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 4a0f185c28b..5e2550fb2fb 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -11,6 +11,46 @@ drop table if exists t1; set @save_histogram_type=@@histogram_type; set @save_histogram_size=@@histogram_size; +create table ten(a int primary key); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t1_bin (a varchar(255)); +insert into t1_bin select concat('a-', a) from ten; + +set histogram_size=100; +analyze table t1_bin persistent for all; +select hex(histogram) from mysql.column_stats where table_name='t1_bin'; +explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; +analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz'; + +create table t1_json (a varchar(255)); +insert into t1_json select concat('a-', a) from ten; +set histogram_type=json; +analyze table t1_json persistent for all; +select * from mysql.column_stats where table_name='t1_json'; +explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; +analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; + +--source include/have_sequence.inc +create table users ( + city varchar(100) +); +set histogram_size=50; +insert into users select 'Moscow' from seq_1_to_99; +insert into users select 'Helsinki' from seq_1_to_2; +set histogram_type=json; +analyze table users persistent for all; +explain extended select * from users where city = 'Moscow'; +analyze select * from users where city = 'Moscow'; + +explain extended select * from users where city = 'Helsinki'; +analyze select * from users where city = 'helsinki'; + + +drop table t1_bin; +drop table t1_json; +drop table users; + CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY, b varchar(32), @@ -81,11 +121,11 @@ SELECT COUNT(*) FROM t1; explain extended select * from t1 where a between '20' and '70'; analyze select * from t1 where a between '20' and '70'; -# todo: test different valid JSON strings that are invalid histograms. -# UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1'; -# FLUSH TABLES; -# --error ER_JSON_HISTOGRAM_PARSE_FAILED -# explain extended select * from t1 where a between '20' and '70'; +# test different valid JSON strings that are invalid histograms. +UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1'; +FLUSH TABLES; +--error ER_JSON_HISTOGRAM_PARSE_FAILED +explain extended select * from t1 where a between '20' and '70'; DELETE FROM mysql.column_stats; DROP TABLE t1; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 0a87d2d0750..7487618978b 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1452,10 +1452,37 @@ double pos_in_interval_through_strxfrm(Field *field, } -double Histogram_json::point_selectivity(Field *field, key_range *min_endp, - key_range *max_endp, double avg_sel) +double Histogram_json::point_selectivity(Field *field, key_range *endpoint, double avg_sel) { - return 0.5; + double sel; + store_key_image_to_rec(field, (uchar *) endpoint->key, + field->key_length()); + const uchar *min_key = endpoint->key; + if (field->real_maybe_null()) + min_key++; + uint min_idx= find_bucket(field, min_key); + + uint max_idx= min_idx; + + // find how many buckets this value occupies + while ((max_idx + 1 < get_width() ) && + (field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) { + max_idx++; + } + + //todo: do we need to account for zero value-length similarly to binary histograms. + + if (max_idx > min_idx) + { + // value spans multiple buckets + double bucket_sel= 1.0/(get_width() + 1); + sel= bucket_sel * (max_idx - min_idx + 1); + } else + { + // the value fits within a single bucket + sel = MIN(avg_sel, get_width()); + } + return sel; } /* @param field The table field histogram is for. We don't care about the @@ -1572,11 +1599,8 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint) if (res < 0) { low = mid + 1; min_bucket_index = mid; - } else if (res > 0) { + } else if (res >= 0) { high = mid - 1; - } else { - //todo: endpoint is on a bucket boundary - break; } } @@ -4326,7 +4350,7 @@ double get_column_range_cardinality(Field *field, if (hist && hist->is_usable(thd)) { res= col_non_nulls * - hist->point_selectivity(field, min_endp, max_endp, + hist->point_selectivity(field, min_endp, avg_frequency / col_non_nulls); } } @@ -4390,8 +4414,7 @@ double get_column_range_cardinality(Field *field, value. */ -double Histogram_binary::point_selectivity(Field *field, key_range *min_endp, - key_range *max_endp, double avg_sel) +double Histogram_binary::point_selectivity(Field *field, key_range *min_endp, double avg_sel) { double sel; Column_statistics *col_stats= field->read_stats; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 14913e200ea..33e0430450a 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -169,8 +169,7 @@ public: virtual void set_size(ulonglong sz)=0; - virtual double point_selectivity(Field *field, key_range *min_endp, - key_range *max_endp, double avg_selection)=0; + virtual double point_selectivity(Field *field, key_range *endpoint, double avg_selection)=0; virtual double range_selectivity(Field *field, key_range *min_endp, key_range *max_endp)=0; @@ -334,8 +333,7 @@ public: /* Estimate selectivity of "col=const" using a histogram */ - double point_selectivity(Field *field, key_range *min_endp, - key_range *max_endp, double avg_sel) override; + double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override; }; class Histogram_json : public Histogram_base @@ -399,8 +397,7 @@ public: uchar *get_values() override { return (uchar *) values; } - double point_selectivity(Field *field, key_range *min_endp, - key_range *max_endp, double avg_selection) override; + double point_selectivity(Field *field, key_range *endpoint, double avg_selection) override; double range_selectivity(Field *field, key_range *min_endp, key_range *max_endp) override;