mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-36536 Add option to not collect statistics for long char/varchars
This is needed to make it easy for users to automatically ignore long char and varchars when using ANALYZE TABLE PERSISTENT. These fields can cause problems as they will consume 'CHARACTERS * MAX_CHARACTER_LENGTH * 2 * number_of_rows' space on disk during analyze, which can easily be much bigger than the analyzed table. This commit adds a new user variable, analyze_max_length, default value 4G. Any field that is bigger than this in bytes, will be ignored by ANALYZE TABLE PERSISTENT unless it is specified in FOR COLUMNS(). While doing this patch, I noticed that we do not skip GEOMETRY columns from ANALYZE TABLE, like we do with BLOB. This should be fixed when merging to the 'main' branch. At the same time we should add a resonable default value for analyze_max_length, probably 1024, like we have for max_sort_length.
This commit is contained in:
@ -453,5 +453,56 @@ SELECT * FROM t1 WHERE f LIKE '2023%';
|
||||
f
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-36536 Add option to not collect statistics for long char/varchars
|
||||
#
|
||||
select @@session.analyze_max_length;
|
||||
@@session.analyze_max_length
|
||||
4294967295
|
||||
create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int)
|
||||
character set utf8mb4 COLLATE utf8mb4_bin;
|
||||
insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1);
|
||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1'
|
||||
test.t1 analyze status OK
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
column_name
|
||||
c0
|
||||
c1
|
||||
c2
|
||||
i1
|
||||
v1
|
||||
v2
|
||||
set @@session.analyze_max_length= 64;
|
||||
truncate table mysql.column_stats;
|
||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze Warning Engine-independent statistics are not collected for column 'c2'
|
||||
test.t1 analyze Warning Engine-independent statistics are not collected for column 'v2'
|
||||
test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1'
|
||||
test.t1 analyze status Table is already up to date
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
column_name
|
||||
c0
|
||||
c1
|
||||
i1
|
||||
v1
|
||||
truncate table mysql.column_stats;
|
||||
ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
column_name
|
||||
c0
|
||||
c2
|
||||
i1
|
||||
v1
|
||||
v2
|
||||
set @@session.analyze_max_length= default;
|
||||
drop table t1;
|
||||
#
|
||||
# End of 10.6 tests
|
||||
#
|
||||
|
@ -306,6 +306,26 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
SELECT * FROM t1 WHERE f LIKE '2023%';
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-36536 Add option to not collect statistics for long char/varchars
|
||||
--echo #
|
||||
|
||||
select @@session.analyze_max_length;
|
||||
create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int)
|
||||
character set utf8mb4 COLLATE utf8mb4_bin;
|
||||
insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1);
|
||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
set @@session.analyze_max_length= 64;
|
||||
truncate table mysql.column_stats;
|
||||
ANALYZE TABLE t1 PERSISTENT FOR ALL;
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
truncate table mysql.column_stats;
|
||||
ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL;
|
||||
select column_name from mysql.column_stats where table_name = 't1';
|
||||
set @@session.analyze_max_length= default;
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.6 tests
|
||||
--echo #
|
||||
|
@ -15,6 +15,10 @@ The following specify which files/extra groups are read (specified before remain
|
||||
--alter-algorithm[=name]
|
||||
Specify the alter table algorithm. One of: DEFAULT, COPY,
|
||||
INPLACE, NOCOPY, INSTANT
|
||||
--analyze-max-length=#
|
||||
Fields that require more storage than analyze_max_length
|
||||
and are not listed in ANALYZE ... FOR COLUMNS () will
|
||||
automatically be skipped by ANALYZE TABLE PERSISTENT
|
||||
--analyze-sample-percentage=#
|
||||
Percentage of rows from the table ANALYZE TABLE will
|
||||
sample to collect table statistics. Set to 0 to let
|
||||
|
@ -20,14 +20,14 @@ exec $MYSQLD_BOOTSTRAP_CMD --symbolic-links=0 --log-bin=foo --lower-case-table-n
|
||||
|
||||
perl;
|
||||
# Variables which we don't want to display in the result file since
|
||||
# their paths may vary:
|
||||
# their paths may vary or they may use a default of 4294967295 :
|
||||
@skipvars=qw/basedir open-files-limit general-log-file log plugin-dir plugin-maturity
|
||||
log-slow-queries pid-file slow-query-log-file log-basename
|
||||
datadir slave-load-tmpdir tmpdir socket thread-pool-size
|
||||
large-files-support lower-case-file-system system-time-zone
|
||||
collation-server character-set-server log-tc-size table-cache
|
||||
table-open-cache table-open-cache-instances max-connections
|
||||
server-uid tls-version version.*/;
|
||||
server-uid tls-version version.* analyze-max-length/;
|
||||
|
||||
# Plugins which may or may not be there:
|
||||
@plugins=qw/innodb archive blackhole federated partition s3
|
||||
|
@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT OPTIONAL
|
||||
VARIABLE_NAME ANALYZE_MAX_LENGTH
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Fields which length in bytes are more than analyze_max_length are skipped by ANALYZE TABLE PERSISTENT unless explicitly listed in the FOR COLUMNS () clause
|
||||
NUMERIC_MIN_VALUE 32
|
||||
NUMERIC_MAX_VALUE 4294967295
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE DOUBLE
|
||||
|
@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT OPTIONAL
|
||||
VARIABLE_NAME ANALYZE_MAX_LENGTH
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Fields that require more storage than analyze_max_length and are not listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by ANALYZE TABLE PERSISTENT
|
||||
NUMERIC_MIN_VALUE 32
|
||||
NUMERIC_MAX_VALUE 4294967295
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE DOUBLE
|
||||
|
@ -988,8 +988,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
|
||||
types here.
|
||||
*/
|
||||
enum enum_field_types type= field->type();
|
||||
if (type < MYSQL_TYPE_TINY_BLOB ||
|
||||
type > MYSQL_TYPE_BLOB)
|
||||
if ((type < MYSQL_TYPE_TINY_BLOB ||
|
||||
type > MYSQL_TYPE_BLOB) &&
|
||||
field->field_length <= thd->variables.analyze_max_length)
|
||||
{
|
||||
field->register_field_in_read_map();
|
||||
bitmap_set_bit(&tab->has_value_set, field->field_index);
|
||||
|
@ -731,6 +731,7 @@ typedef struct system_variables
|
||||
ha_rows select_limit;
|
||||
ha_rows max_join_size;
|
||||
ha_rows expensive_subquery_limit;
|
||||
ulong analyze_max_length;
|
||||
ulong auto_increment_increment, auto_increment_offset;
|
||||
#ifdef WITH_WSREP
|
||||
/*
|
||||
|
@ -445,6 +445,20 @@ static Sys_var_double Sys_analyze_sample_percentage(
|
||||
CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100),
|
||||
DEFAULT(100));
|
||||
|
||||
/*
|
||||
The max length have to be UINT_MAX32 to not remove GEOMETRY fields
|
||||
from analyze.
|
||||
*/
|
||||
|
||||
static Sys_var_ulong Sys_analyze_max_length(
|
||||
"analyze_max_length",
|
||||
"Fields that require more storage than analyze_max_length and are not "
|
||||
"listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by "
|
||||
"ANALYZE TABLE PERSISTENT",
|
||||
SESSION_VAR(analyze_max_length),
|
||||
CMD_LINE(REQUIRED_ARG), VALID_RANGE(32, UINT_MAX32),
|
||||
DEFAULT(UINT_MAX32), BLOCK_SIZE(1));
|
||||
|
||||
static Sys_var_ulong Sys_auto_increment_increment(
|
||||
"auto_increment_increment",
|
||||
"Auto-increment columns are incremented by this",
|
||||
|
Reference in New Issue
Block a user