From f3f78bed8530e1e858d5ed87054f2ac672760824 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 18 Oct 2021 16:31:18 +0300 Subject: [PATCH] MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram Fix a bug in position_in_interval(). Do not overwrite one interval endpoint with another. --- mysql-test/main/statistics_json.result | 26 +++++++++++++++++++++++--- mysql-test/main/statistics_json.test | 19 +++++++++++++++++++ sql/opt_histogram_json.cc | 25 +++++++++++++++++++------ 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 9908d93c15b..3cd9bfb181f 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -4243,12 +4243,12 @@ test t1_json a a-0 a-9 0.0000 3.0000 1.0000 10 JSON_HB { } explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 50.00 Using where +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 58.71 Using where Warnings: Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` between 'a-3a' and 'zzzzzzzzz' analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 50.00 60.00 Using where +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 58.71 60.00 Using where explain extended select * from t1_json where a < 'b-1a'; id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 90.00 Using where @@ -7280,7 +7280,7 @@ Percentage 0.0 99.9 47 JSON_HB { } analyze select * from Country use index () where Code between 'BBC' and 'GGG'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE Country ALL NULL NULL NULL NULL 239 239.00 25.10 25.52 Using where +1 SIMPLE Country ALL NULL NULL NULL NULL 239 239.00 24.58 25.52 Using where analyze select * from Country use index () where Code < 'BBC'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE Country ALL NULL NULL NULL NULL 239 239.00 8.37 7.11 Using where @@ -7510,3 +7510,23 @@ histogram ] } drop table t1; +# +# MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram +# +create table t1 (c char(8)); +insert into t1 values ('1x'); +insert into t1 values ('1x'); +insert into t1 values ('1xx'); +insert into t1 values ('0xx'); +insert into t1 select * from t1; +insert into t1 select * from t1; +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +analyze +select c from t1 where c > '1'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where +drop table t1; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 7325d56e912..352150e8497 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -276,3 +276,22 @@ select histogram from mysql.column_stats where table_name = 't1'; drop table t1; +--echo # +--echo # MDEV-26750: Estimation for filtered rows is far off with JSON_HB histogram +--echo # +create table t1 (c char(8)); + +insert into t1 values ('1x'); +insert into t1 values ('1x'); +insert into t1 values ('1xx'); +insert into t1 values ('0xx'); +insert into t1 select * from t1; +insert into t1 select * from t1; + +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +analyze +select c from t1 where c > '1'; + +drop table t1; + diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 944c5ffb353..1793aa7df0d 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -569,21 +569,34 @@ double position_in_interval(Field *field, const uchar *key, uint key_len, if (field->pos_through_val_str()) { StringBuffer<64> buf1, buf2, buf3; - String empty_buf1, empty_buf2, empty_buf3; store_key_image_to_rec_no_null(field, left.data(), left.size()); - String *min_str= field->val_str(&buf1, &empty_buf1); + String *min_str= field->val_str(&buf1); + /* + Make sure we've saved a copy of the data, not a pointer into the + field->ptr. We will overwrite the contents of field->ptr with the next + store_key_image_to_rec_no_null call + */ + if (&buf1 != min_str) + buf1.copy(*min_str); + else + buf1.copy(); store_key_image_to_rec_no_null(field, right.data(), right.size()); - String *max_str= field->val_str(&buf2, &empty_buf2); + String *max_str= field->val_str(&buf2); + /* Same as above */ + if (&buf2 != max_str) + buf2.copy(*max_str); + else + buf2.copy(); store_key_image_to_rec_no_null(field, (const char*)key, key_len); - String *midp_str= field->val_str(&buf3, &empty_buf3); + String *midp_str= field->val_str(&buf3); res= pos_in_interval_for_string(field->charset(), (const uchar*)midp_str->ptr(), midp_str->length(), - (const uchar*)min_str->ptr(), min_str->length(), - (const uchar*)max_str->ptr(), max_str->length()); + (const uchar*)buf1.ptr(), buf1.length(), + (const uchar*)buf2.ptr(), buf2.length()); } else {