From 1b934a387ccfa91dd0cbbc3e4fd200fabe1b7fc1 Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 21 Apr 2025 19:12:58 +0300 Subject: [PATCH] MDEV-36536 Add option to not collect statistics for long char/varchars This is needed to make it easy for users to automatically ignore long char and varchars when using ANALYZE TABLE PERSISTENT. These fields can cause problems as they will consume 'CHARACTERS * MAX_CHARACTER_LENGTH * 2 * number_of_rows' space on disk during analyze, which can easily be much bigger than the analyzed table. This commit adds a new user variable, analyze_max_length, default value 4G. Any field that is bigger than this in bytes, will be ignored by ANALYZE TABLE PERSISTENT unless it is specified in FOR COLUMNS(). While doing this patch, I noticed that we do not skip GEOMETRY columns from ANALYZE TABLE, like we do with BLOB. This should be fixed when merging to the 'main' branch. At the same time we should add a resonable default value for analyze_max_length, probably 1024, like we have for max_sort_length. --- mysql-test/main/analyze.result | 51 +++++++++++++++++++ mysql-test/main/analyze.test | 20 ++++++++ mysql-test/main/mysqld--help.result | 4 ++ mysql-test/main/mysqld--help.test | 4 +- .../sys_vars/r/sysvars_server_embedded.result | 10 ++++ .../r/sysvars_server_notembedded.result | 10 ++++ sql/sql_admin.cc | 5 +- sql/sql_class.h | 1 + sql/sys_vars.cc | 14 +++++ 9 files changed, 115 insertions(+), 4 deletions(-) diff --git a/mysql-test/main/analyze.result b/mysql-test/main/analyze.result index 8819f15f27b..cf86bb31e7d 100644 --- a/mysql-test/main/analyze.result +++ b/mysql-test/main/analyze.result @@ -453,5 +453,56 @@ SELECT * FROM t1 WHERE f LIKE '2023%'; f DROP TABLE t1; # +# MDEV-36536 Add option to not collect statistics for long char/varchars +# +select @@session.analyze_max_length; +@@session.analyze_max_length +4294967295 +create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int) +character set utf8mb4 COLLATE utf8mb4_bin; +insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1); +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1' +test.t1 analyze status OK +select column_name from mysql.column_stats where table_name = 't1'; +column_name +c0 +c1 +c2 +i1 +v1 +v2 +set @@session.analyze_max_length= 64; +truncate table mysql.column_stats; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze Warning Engine-independent statistics are not collected for column 'c2' +test.t1 analyze Warning Engine-independent statistics are not collected for column 'v2' +test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1' +test.t1 analyze status Table is already up to date +select column_name from mysql.column_stats where table_name = 't1'; +column_name +c0 +c1 +i1 +v1 +truncate table mysql.column_stats; +ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +select column_name from mysql.column_stats where table_name = 't1'; +column_name +c0 +c2 +i1 +v1 +v2 +set @@session.analyze_max_length= default; +drop table t1; +# # End of 10.6 tests # diff --git a/mysql-test/main/analyze.test b/mysql-test/main/analyze.test index fb07e11b0c6..768b18dcb73 100644 --- a/mysql-test/main/analyze.test +++ b/mysql-test/main/analyze.test @@ -306,6 +306,26 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM t1 WHERE f LIKE '2023%'; DROP TABLE t1; +--echo # +--echo # MDEV-36536 Add option to not collect statistics for long char/varchars +--echo # + +select @@session.analyze_max_length; +create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int) +character set utf8mb4 COLLATE utf8mb4_bin; +insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1); +ANALYZE TABLE t1 PERSISTENT FOR ALL; +select column_name from mysql.column_stats where table_name = 't1'; +set @@session.analyze_max_length= 64; +truncate table mysql.column_stats; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +select column_name from mysql.column_stats where table_name = 't1'; +truncate table mysql.column_stats; +ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL; +select column_name from mysql.column_stats where table_name = 't1'; +set @@session.analyze_max_length= default; +drop table t1; + --echo # --echo # End of 10.6 tests --echo # diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 8857aff2fc2..f4aa81e0d66 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -15,6 +15,10 @@ The following specify which files/extra groups are read (specified before remain --alter-algorithm[=name] Specify the alter table algorithm. One of: DEFAULT, COPY, INPLACE, NOCOPY, INSTANT + --analyze-max-length=# + Fields that require more storage than analyze_max_length + and are not listed in ANALYZE ... FOR COLUMNS () will + automatically be skipped by ANALYZE TABLE PERSISTENT --analyze-sample-percentage=# Percentage of rows from the table ANALYZE TABLE will sample to collect table statistics. Set to 0 to let diff --git a/mysql-test/main/mysqld--help.test b/mysql-test/main/mysqld--help.test index 971983fd66c..44449cd3ad5 100644 --- a/mysql-test/main/mysqld--help.test +++ b/mysql-test/main/mysqld--help.test @@ -20,14 +20,14 @@ exec $MYSQLD_BOOTSTRAP_CMD --symbolic-links=0 --log-bin=foo --lower-case-table-n perl; # Variables which we don't want to display in the result file since - # their paths may vary: + # their paths may vary or they may use a default of 4294967295 : @skipvars=qw/basedir open-files-limit general-log-file log plugin-dir plugin-maturity log-slow-queries pid-file slow-query-log-file log-basename datadir slave-load-tmpdir tmpdir socket thread-pool-size large-files-support lower-case-file-system system-time-zone collation-server character-set-server log-tc-size table-cache table-open-cache table-open-cache-instances max-connections - server-uid tls-version version.*/; + server-uid tls-version version.* analyze-max-length/; # Plugins which may or may not be there: @plugins=qw/innodb archive blackhole federated partition s3 diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 7ec89b4dbf3..9eb4e5687d1 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME ANALYZE_MAX_LENGTH +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Fields which length in bytes are more than analyze_max_length are skipped by ANALYZE TABLE PERSISTENT unless explicitly listed in the FOR COLUMNS () clause +NUMERIC_MIN_VALUE 32 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE VARIABLE_SCOPE SESSION VARIABLE_TYPE DOUBLE diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index c8f0760e38b..7f08e1bbfc2 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME ANALYZE_MAX_LENGTH +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Fields that require more storage than analyze_max_length and are not listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by ANALYZE TABLE PERSISTENT +NUMERIC_MIN_VALUE 32 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE VARIABLE_SCOPE SESSION VARIABLE_TYPE DOUBLE diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index fd2d5178f1d..1d578266c98 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -988,8 +988,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, types here. */ enum enum_field_types type= field->type(); - if (type < MYSQL_TYPE_TINY_BLOB || - type > MYSQL_TYPE_BLOB) + if ((type < MYSQL_TYPE_TINY_BLOB || + type > MYSQL_TYPE_BLOB) && + field->field_length <= thd->variables.analyze_max_length) { field->register_field_in_read_map(); bitmap_set_bit(&tab->has_value_set, field->field_index); diff --git a/sql/sql_class.h b/sql/sql_class.h index 69b021cd41d..aac433181a4 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -731,6 +731,7 @@ typedef struct system_variables ha_rows select_limit; ha_rows max_join_size; ha_rows expensive_subquery_limit; + ulong analyze_max_length; ulong auto_increment_increment, auto_increment_offset; #ifdef WITH_WSREP /* diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 0d4a8e4c6fe..4ae14a58feb 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -445,6 +445,20 @@ static Sys_var_double Sys_analyze_sample_percentage( CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100), DEFAULT(100)); +/* + The max length have to be UINT_MAX32 to not remove GEOMETRY fields + from analyze. +*/ + +static Sys_var_ulong Sys_analyze_max_length( + "analyze_max_length", + "Fields that require more storage than analyze_max_length and are not " + "listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by " + "ANALYZE TABLE PERSISTENT", + SESSION_VAR(analyze_max_length), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(32, UINT_MAX32), + DEFAULT(UINT_MAX32), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_auto_increment_increment( "auto_increment_increment", "Auto-increment columns are incremented by this",