1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-36536 Add option to not collect statistics for long char/varchars

This is needed to make it easy for users to automatically ignore long
char and varchars when using  ANALYZE TABLE PERSISTENT.
These fields can cause problems as they will consume
'CHARACTERS * MAX_CHARACTER_LENGTH * 2 * number_of_rows' space on disk
during analyze, which can easily be much bigger than the analyzed table.

This commit adds a new user variable, analyze_max_length, default value 4G.
Any field that is bigger than this in bytes, will be ignored by
ANALYZE TABLE PERSISTENT unless it is specified in FOR COLUMNS().

While doing this patch, I noticed that we do not skip GEOMETRY columns from
ANALYZE TABLE, like we do with BLOB. This should be fixed when merging
to the 'main' branch. At the same time we should add a resonable default
value for analyze_max_length, probably 1024, like we have for
max_sort_length.
This commit is contained in:
Monty
2025-04-21 19:12:58 +03:00
parent 2b448e7337
commit 1b934a387c
9 changed files with 115 additions and 4 deletions

View File

@ -453,5 +453,56 @@ SELECT * FROM t1 WHERE f LIKE '2023%';
f
DROP TABLE t1;
#
# MDEV-36536 Add option to not collect statistics for long char/varchars
#
select @@session.analyze_max_length;
@@session.analyze_max_length
4294967295
create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int)
character set utf8mb4 COLLATE utf8mb4_bin;
insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1);
ANALYZE TABLE t1 PERSISTENT FOR ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1'
test.t1 analyze status OK
select column_name from mysql.column_stats where table_name = 't1';
column_name
c0
c1
c2
i1
v1
v2
set @@session.analyze_max_length= 64;
truncate table mysql.column_stats;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'c2'
test.t1 analyze Warning Engine-independent statistics are not collected for column 'v2'
test.t1 analyze Warning Engine-independent statistics are not collected for column 'b1'
test.t1 analyze status Table is already up to date
select column_name from mysql.column_stats where table_name = 't1';
column_name
c0
c1
i1
v1
truncate table mysql.column_stats;
ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
select column_name from mysql.column_stats where table_name = 't1';
column_name
c0
c2
i1
v1
v2
set @@session.analyze_max_length= default;
drop table t1;
#
# End of 10.6 tests
#

View File

@ -306,6 +306,26 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM t1 WHERE f LIKE '2023%';
DROP TABLE t1;
--echo #
--echo # MDEV-36536 Add option to not collect statistics for long char/varchars
--echo #
select @@session.analyze_max_length;
create table t1 (c0 char(2), c1 char(16), c2 char(64), v1 varchar(16), v2 varchar(1000), b1 blob, i1 int)
character set utf8mb4 COLLATE utf8mb4_bin;
insert into t1 values ("A", "A","A","A","A","A",1), ("B","B","B","B","B","B",1);
ANALYZE TABLE t1 PERSISTENT FOR ALL;
select column_name from mysql.column_stats where table_name = 't1';
set @@session.analyze_max_length= 64;
truncate table mysql.column_stats;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
select column_name from mysql.column_stats where table_name = 't1';
truncate table mysql.column_stats;
ANALYZE TABLE t1 PERSISTENT for COLUMNS (c0,c2,v1,v2,i1) INDEXES ALL;
select column_name from mysql.column_stats where table_name = 't1';
set @@session.analyze_max_length= default;
drop table t1;
--echo #
--echo # End of 10.6 tests
--echo #

View File

@ -15,6 +15,10 @@ The following specify which files/extra groups are read (specified before remain
--alter-algorithm[=name]
Specify the alter table algorithm. One of: DEFAULT, COPY,
INPLACE, NOCOPY, INSTANT
--analyze-max-length=#
Fields that require more storage than analyze_max_length
and are not listed in ANALYZE ... FOR COLUMNS () will
automatically be skipped by ANALYZE TABLE PERSISTENT
--analyze-sample-percentage=#
Percentage of rows from the table ANALYZE TABLE will
sample to collect table statistics. Set to 0 to let

View File

@ -20,14 +20,14 @@ exec $MYSQLD_BOOTSTRAP_CMD --symbolic-links=0 --log-bin=foo --lower-case-table-n
perl;
# Variables which we don't want to display in the result file since
# their paths may vary:
# their paths may vary or they may use a default of 4294967295 :
@skipvars=qw/basedir open-files-limit general-log-file log plugin-dir plugin-maturity
log-slow-queries pid-file slow-query-log-file log-basename
datadir slave-load-tmpdir tmpdir socket thread-pool-size
large-files-support lower-case-file-system system-time-zone
collation-server character-set-server log-tc-size table-cache
table-open-cache table-open-cache-instances max-connections
server-uid tls-version version.*/;
server-uid tls-version version.* analyze-max-length/;
# Plugins which may or may not be there:
@plugins=qw/innodb archive blackhole federated partition s3

View File

@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME ANALYZE_MAX_LENGTH
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Fields which length in bytes are more than analyze_max_length are skipped by ANALYZE TABLE PERSISTENT unless explicitly listed in the FOR COLUMNS () clause
NUMERIC_MIN_VALUE 32
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
VARIABLE_SCOPE SESSION
VARIABLE_TYPE DOUBLE

View File

@ -22,6 +22,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DEFAULT,COPY,INPLACE,NOCOPY,INSTANT
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME ANALYZE_MAX_LENGTH
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Fields that require more storage than analyze_max_length and are not listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by ANALYZE TABLE PERSISTENT
NUMERIC_MIN_VALUE 32
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME ANALYZE_SAMPLE_PERCENTAGE
VARIABLE_SCOPE SESSION
VARIABLE_TYPE DOUBLE

View File

@ -988,8 +988,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
types here.
*/
enum enum_field_types type= field->type();
if (type < MYSQL_TYPE_TINY_BLOB ||
type > MYSQL_TYPE_BLOB)
if ((type < MYSQL_TYPE_TINY_BLOB ||
type > MYSQL_TYPE_BLOB) &&
field->field_length <= thd->variables.analyze_max_length)
{
field->register_field_in_read_map();
bitmap_set_bit(&tab->has_value_set, field->field_index);

View File

@ -731,6 +731,7 @@ typedef struct system_variables
ha_rows select_limit;
ha_rows max_join_size;
ha_rows expensive_subquery_limit;
ulong analyze_max_length;
ulong auto_increment_increment, auto_increment_offset;
#ifdef WITH_WSREP
/*

View File

@ -445,6 +445,20 @@ static Sys_var_double Sys_analyze_sample_percentage(
CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 100),
DEFAULT(100));
/*
The max length have to be UINT_MAX32 to not remove GEOMETRY fields
from analyze.
*/
static Sys_var_ulong Sys_analyze_max_length(
"analyze_max_length",
"Fields that require more storage than analyze_max_length and are not "
"listed in ANALYZE ... FOR COLUMNS () will automatically be skipped by "
"ANALYZE TABLE PERSISTENT",
SESSION_VAR(analyze_max_length),
CMD_LINE(REQUIRED_ARG), VALID_RANGE(32, UINT_MAX32),
DEFAULT(UINT_MAX32), BLOCK_SIZE(1));
static Sys_var_ulong Sys_auto_increment_increment(
"auto_increment_increment",
"Auto-increment columns are incremented by this",