mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
Introduce analyze_sample_percentage variable
The variable controls the amount of sampling analyze table performs. If ANALYZE table with histogram collection is too slow, one can reduce the time taken by setting analyze_sample_percentage to a lower value of the total number of rows. Setting it to 0 will use a formula to compute how many rows to sample: The number of rows collected is capped to a minimum of 50000 and increases logarithmically with a coffecient of 4096. The coffecient is chosen so that we expect an error of less than 3% in our estimations according to the paper: "Random Sampling for Histogram Construction: How much is enough?” – Surajit Chaudhuri, Rajeev Motwani, Vivek Narasayya, ACM SIGMOD, 1998. The drawback of sampling is that avg_frequency number is computed imprecisely and will yeild a smaller number than the real one.
This commit is contained in:
@ -1760,3 +1760,107 @@ DROP TABLE t1;
|
||||
# End of 10.2 tests
|
||||
#
|
||||
set histogram_size=@save_hist_size, histogram_type=@save_hist_type;
|
||||
#
|
||||
# Start of 10.4 tests
|
||||
#
|
||||
#
|
||||
# Test analyze_sample_percentage system variable.
|
||||
#
|
||||
set @save_use_stat_tables=@@use_stat_tables;
|
||||
set @save_analyze_sample_percentage=@@analyze_sample_percentage;
|
||||
set session rand_seed1=42;
|
||||
set session rand_seed2=62;
|
||||
set use_stat_tables=PREFERABLY;
|
||||
set histogram_size=10;
|
||||
CREATE TABLE t1 (id int);
|
||||
INSERT INTO t1 (id) VALUES (1), (1), (1), (1), (1), (1), (1);
|
||||
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||
INSERT INTO t1 SELECT id+1 FROM t1;
|
||||
INSERT INTO t1 SELECT id+2 FROM t1;
|
||||
INSERT INTO t1 SELECT id+4 FROM t1;
|
||||
INSERT INTO t1 SELECT id+8 FROM t1;
|
||||
INSERT INTO t1 SELECT id+16 FROM t1;
|
||||
INSERT INTO t1 SELECT id+32 FROM t1;
|
||||
INSERT INTO t1 SELECT id+64 FROM t1;
|
||||
INSERT INTO t1 SELECT id+128 FROM t1;
|
||||
INSERT INTO t1 SELECT id+256 FROM t1;
|
||||
INSERT INTO t1 SELECT id+512 FROM t1;
|
||||
INSERT INTO t1 SELECT id+1024 FROM t1;
|
||||
INSERT INTO t1 SELECT id+2048 FROM t1;
|
||||
INSERT INTO t1 SELECT id+4096 FROM t1;
|
||||
INSERT INTO t1 SELECT id+9192 FROM t1;
|
||||
#
|
||||
# This query will should show a full table scan analysis.
|
||||
#
|
||||
ANALYZE TABLE t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status OK
|
||||
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||
DECODE_HISTOGRAM(hist_type, histogram)
|
||||
from mysql.column_stats;
|
||||
table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
|
||||
t1 id 1 17384 0.0000 4.0000 14.0000 0.082,0.086,0.086,0.086,0.086,0.141,0.086,0.086,0.086,0.086,0.086
|
||||
set analyze_sample_percentage=0.1;
|
||||
#
|
||||
# This query will show an innacurate avg_frequency value.
|
||||
#
|
||||
ANALYZE TABLE t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||
DECODE_HISTOGRAM(hist_type, histogram)
|
||||
from mysql.column_stats;
|
||||
table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
|
||||
t1 id 111 17026 0.0000 4.0000 1.0047 0.039,0.098,0.055,0.118,0.078,0.157,0.082,0.118,0.094,0.063,0.098
|
||||
#
|
||||
# This query will show a better avg_frequency value.
|
||||
#
|
||||
set analyze_sample_percentage=25;
|
||||
ANALYZE TABLE t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||
DECODE_HISTOGRAM(hist_type, histogram)
|
||||
from mysql.column_stats;
|
||||
table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
|
||||
t1 id 1 17384 0.0000 4.0000 3.5736 0.082,0.086,0.086,0.082,0.086,0.145,0.086,0.086,0.082,0.086,0.090
|
||||
set analyze_sample_percentage=0;
|
||||
#
|
||||
# Test self adjusting sampling level.
|
||||
#
|
||||
ANALYZE TABLE t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||
DECODE_HISTOGRAM(hist_type, histogram)
|
||||
from mysql.column_stats;
|
||||
table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
|
||||
t1 id 1 17384 0.0000 4.0000 7.4523 0.082,0.090,0.086,0.082,0.086,0.145,0.086,0.082,0.086,0.086,0.086
|
||||
#
|
||||
# Test record estimation is working properly.
|
||||
#
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
229376
|
||||
explain select * from t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229060
|
||||
set analyze_sample_percentage=100;
|
||||
ANALYZE TABLE t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
|
||||
DECODE_HISTOGRAM(hist_type, histogram)
|
||||
from mysql.column_stats;
|
||||
table_name column_name min_value max_value nulls_ratio avg_length avg_frequency DECODE_HISTOGRAM(hist_type, histogram)
|
||||
t1 id 1 17384 0.0000 4.0000 14.0000 0.082,0.086,0.086,0.086,0.086,0.141,0.086,0.086,0.086,0.086,0.086
|
||||
explain select * from t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
|
||||
set use_stat_tables=@save_use_stat_tables;
|
||||
drop table t1;
|
||||
|
Reference in New Issue
Block a user