From 9cd4d4984025857782e12e53d32cea5e4b7684e5 Mon Sep 17 00:00:00 2001 From: Jimmy Yang Date: Fri, 14 Jan 2011 09:02:28 -0800 Subject: [PATCH] Fix Bug#30423 "InnoDBs treatment of NULL in index stats causes bad "rows examined" estimates". This change implements "innodb_stats_method" with options of "nulls_equal", "nulls_unequal" and "null_ignored". rb://553 approved by Marko --- .../suite/innodb/r/innodb_bug30423.result | 95 ++++++++ .../suite/innodb/t/innodb_bug30423.test | 211 ++++++++++++++++++ .../innodb_plugin/r/innodb_bug30423.result | 95 ++++++++ .../innodb_plugin/t/innodb_bug30423.test | 211 ++++++++++++++++++ storage/innobase/btr/btr0cur.c | 146 +++++++++--- storage/innobase/dict/dict0dict.c | 10 + storage/innobase/handler/ha_innodb.cc | 95 +++++++- storage/innobase/include/btr0cur.h | 5 +- storage/innobase/include/dict0mem.h | 6 + storage/innobase/include/rem0cmp.h | 4 + storage/innobase/include/rem0cmp.ic | 2 +- storage/innobase/include/srv0srv.h | 18 ++ storage/innobase/rem/rem0cmp.c | 14 +- storage/innobase/srv/srv0srv.c | 5 + storage/innodb_plugin/ChangeLog | 8 + storage/innodb_plugin/btr/btr0cur.c | 150 ++++++++++--- storage/innodb_plugin/dict/dict0dict.c | 10 + storage/innodb_plugin/handler/ha_innodb.cc | 95 +++++++- storage/innodb_plugin/include/btr0cur.h | 5 +- storage/innodb_plugin/include/dict0mem.h | 6 + storage/innodb_plugin/include/rem0cmp.h | 4 + storage/innodb_plugin/include/rem0cmp.ic | 2 +- storage/innodb_plugin/include/srv0srv.h | 18 ++ storage/innodb_plugin/rem/rem0cmp.c | 14 +- storage/innodb_plugin/srv/srv0srv.c | 5 + 25 files changed, 1157 insertions(+), 77 deletions(-) create mode 100644 mysql-test/suite/innodb/r/innodb_bug30423.result create mode 100644 mysql-test/suite/innodb/t/innodb_bug30423.test create mode 100644 mysql-test/suite/innodb_plugin/r/innodb_bug30423.result create mode 100644 mysql-test/suite/innodb_plugin/t/innodb_bug30423.test diff --git a/mysql-test/suite/innodb/r/innodb_bug30423.result b/mysql-test/suite/innodb/r/innodb_bug30423.result new file mode 100644 index 00000000000..a19809366ae --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug30423.result @@ -0,0 +1,95 @@ +set global innodb_stats_method = default; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +select count(*) from bug30243_3 where org_id is not NULL; +count(*) +20 +select count(*) from bug30243_3 where org_id is NULL; +count(*) +16384 +select count(*) from bug30243_2 where org_id is not NULL; +count(*) +224 +select count(*) from bug30243_2 where org_id is NULL; +count(*) +65536 +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +set global innodb_stats_method = "NULL"; +ERROR 42000: Variable 'stats_method' can't be set to the value of 'NULL' +set global innodb_stats_method = "nulls_ignored"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id ; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +set global innodb_stats_method = "nulls_unequal"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_unequal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; +COUNT(*) +1024 +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = nulls_equal; +drop table bug30243_2; +drop table bug30243_1; +drop table bug30243_3; +drop table table_bug30423; diff --git a/mysql-test/suite/innodb/t/innodb_bug30423.test b/mysql-test/suite/innodb/t/innodb_bug30423.test new file mode 100644 index 00000000000..f2a3ee8d099 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug30423.test @@ -0,0 +1,211 @@ +# Test for Bug #30423, InnoDBs treatment of NULL in index stats causes +# bad "rows examined" estimates. +# Implemented InnoDB system variable "innodb_stats_method" with +# "nulls_equal" (default), "nulls_unequal", and "nulls_ignored" options. + +-- source include/have_innodb.inc + +let $innodb_stats_method_orig = `select @@innodb_stats_method`; + +# default setting for innodb_stats_method is "nulls_equal" +set global innodb_stats_method = default; + +select @@innodb_stats_method; + +# create three tables, bug30243_1, bug30243_2 and bug30243_3. +# The test scenario is adopted from original bug #30423 report. +# table bug30243_1 and bug30243_3 have many NULL values + +-- disable_result_log +-- disable_query_log + +DROP TABLE IF EXISTS bug30243_1; +CREATE TABLE bug30243_1 ( + org_id int(11) NOT NULL default '0', + UNIQUE KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +LOCK TABLES bug30243_1 WRITE; +INSERT INTO bug30243_1 VALUES (11),(15),(16),(17),(19),(20),(21),(23),(24), +(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(37),(38),(40),(41), +(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56), +(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71), +(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86), +(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101), +(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127), +(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140), +(141),(142),(143),(144),(145); +UNLOCK TABLES; + +DROP TABLE IF EXISTS bug30243_3; +CREATE TABLE bug30243_3 ( + org_id int(11) default NULL, + KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_3 VALUES (NULL); + +begin; +let $i=14; +while ($i) +{ + INSERT INTO bug30243_3 SELECT NULL FROM bug30243_3; + dec $i; +} + +INSERT INTO bug30243_3 VALUES (34),(34),(35),(56),(58),(62),(62),(64),(65),(66),(80),(135),(137),(138),(139),(140),(142),(143),(144),(145); +commit; + +DROP TABLE IF EXISTS bug30243_2; +CREATE TABLE bug30243_2 ( + org_id int(11) default NULL, + KEY `contacts$org_id` (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_2 VALUES (NULL); + +begin; +let $i=16; +while ($i) +{ + INSERT INTO bug30243_2 SELECT NULL FROM bug30243_2; + dec $i; +} + +INSERT INTO bug30243_2 VALUES (11),(15),(16),(17),(20),(21),(23),(24),(25), +(26),(27),(28),(29),(30),(31),(32),(33),(34),(37),(38),(40),(41),(42),(43), +(44),(45),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(48), +(48),(50),(51),(52),(52),(53),(54),(55),(57),(60),(61),(62),(62),(62),(62), +(62),(63),(64),(64),(65),(66),(66),(67),(68),(69),(70),(71),(72),(73),(74), +(75),(76),(77),(78),(79),(80),(80),(81),(82),(83),(84),(85),(86),(87),(88), +(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102), +(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126), +(127),(128),(129),(130),(131),(132),(133),(133),(135),(135),(135),(135), +(136),(136),(138),(138),(139),(139),(139),(140),(141),(141),(142),(143), +(143),(145),(145); +commit; + + +-- enable_result_log +-- enable_query_log + +# check tables's value +select count(*) from bug30243_3 where org_id is not NULL; +select count(*) from bug30243_3 where org_id is NULL; + +select count(*) from bug30243_2 where org_id is not NULL; +select count(*) from bug30243_2 where org_id is NULL; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we over estimate the rows per +# unique value (since there are many NULLs). +# Skip this query log since the stats estimate could vary from runs +-- disable_query_log +-- disable_result_log +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; +-- enable_query_log +-- enable_result_log + +# following set operation will fail +#--error ER_WRONG_VALUE_FOR_VAR +--error 1231 +set global innodb_stats_method = "NULL"; + +set global innodb_stats_method = "nulls_ignored"; + +select @@innodb_stats_method; + +# Regenerate the stats with "nulls_ignored" option + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (should be approximately 1 row per value) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; + +select @@innodb_stats_method; + +# Try the "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (~1) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id; + + +# Create a table with all NULL values, make sure the stats calculation +# does not crash with table of all NULL values +-- disable_query_log +CREATE TABLE table_bug30423 ( + org_id int(11) default NULL, + KEY(org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO `table_bug30423` VALUES (NULL); + +begin; +let $i=10; +while ($i) +{ + INSERT INTO table_bug30423 SELECT NULL FROM table_bug30423; + dec $i; +} +commit; + +-- enable_query_log + +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; + +# calculate the statistics for the table for "nulls_ignored" and +# "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; + +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; + + +eval set global innodb_stats_method = $innodb_stats_method_orig; + +drop table bug30243_2; + +drop table bug30243_1; + +drop table bug30243_3; + +drop table table_bug30423; diff --git a/mysql-test/suite/innodb_plugin/r/innodb_bug30423.result b/mysql-test/suite/innodb_plugin/r/innodb_bug30423.result new file mode 100644 index 00000000000..a19809366ae --- /dev/null +++ b/mysql-test/suite/innodb_plugin/r/innodb_bug30423.result @@ -0,0 +1,95 @@ +set global innodb_stats_method = default; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +select count(*) from bug30243_3 where org_id is not NULL; +count(*) +20 +select count(*) from bug30243_3 where org_id is NULL; +count(*) +16384 +select count(*) from bug30243_2 where org_id is not NULL; +count(*) +224 +select count(*) from bug30243_2 where org_id is NULL; +count(*) +65536 +select @@innodb_stats_method; +@@innodb_stats_method +nulls_equal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +set global innodb_stats_method = "NULL"; +ERROR 42000: Variable 'stats_method' can't be set to the value of 'NULL' +set global innodb_stats_method = "nulls_ignored"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id ; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +select @@innodb_stats_method; +@@innodb_stats_method +nulls_ignored +set global innodb_stats_method = "nulls_unequal"; +select @@innodb_stats_method; +@@innodb_stats_method +nulls_unequal +analyze table bug30243_1; +Table Op Msg_type Msg_text +test.bug30243_1 analyze status OK +analyze table bug30243_2; +Table Op Msg_type Msg_text +test.bug30243_2 analyze status OK +analyze table bug30243_3; +Table Op Msg_type Msg_text +test.bug30243_3 analyze status OK +explain SELECT COUNT(*), 0 +FROM bug30243_1 orgs +LEFT JOIN bug30243_3 sa_opportunities +ON orgs.org_id=sa_opportunities.org_id +LEFT JOIN bug30243_2 contacts +ON orgs.org_id=contacts.org_id; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index +1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index +1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; +COUNT(*) +1024 +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; +Table Op Msg_type Msg_text +test.table_bug30423 analyze status OK +set global innodb_stats_method = nulls_equal; +drop table bug30243_2; +drop table bug30243_1; +drop table bug30243_3; +drop table table_bug30423; diff --git a/mysql-test/suite/innodb_plugin/t/innodb_bug30423.test b/mysql-test/suite/innodb_plugin/t/innodb_bug30423.test new file mode 100644 index 00000000000..458c2967e19 --- /dev/null +++ b/mysql-test/suite/innodb_plugin/t/innodb_bug30423.test @@ -0,0 +1,211 @@ +# Test for Bug #30423, InnoDBs treatment of NULL in index stats causes +# bad "rows examined" estimates. +# Implemented InnoDB system variable "innodb_stats_method" with +# "nulls_equal" (default), "nulls_unequal", and "nulls_ignored" options. + +-- source include/have_innodb_plugin.inc + +let $innodb_stats_method_orig = `select @@innodb_stats_method`; + +# default setting for innodb_stats_method is "nulls_equal" +set global innodb_stats_method = default; + +select @@innodb_stats_method; + +# create three tables, bug30243_1, bug30243_2 and bug30243_3. +# The test scenario is adopted from original bug #30423 report. +# table bug30243_1 and bug30243_3 have many NULL values + +-- disable_result_log +-- disable_query_log + +DROP TABLE IF EXISTS bug30243_1; +CREATE TABLE bug30243_1 ( + org_id int(11) NOT NULL default '0', + UNIQUE KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +LOCK TABLES bug30243_1 WRITE; +INSERT INTO bug30243_1 VALUES (11),(15),(16),(17),(19),(20),(21),(23),(24), +(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(37),(38),(40),(41), +(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56), +(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71), +(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86), +(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101), +(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127), +(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140), +(141),(142),(143),(144),(145); +UNLOCK TABLES; + +DROP TABLE IF EXISTS bug30243_3; +CREATE TABLE bug30243_3 ( + org_id int(11) default NULL, + KEY (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_3 VALUES (NULL); + +begin; +let $i=14; +while ($i) +{ + INSERT INTO bug30243_3 SELECT NULL FROM bug30243_3; + dec $i; +} + +INSERT INTO bug30243_3 VALUES (34),(34),(35),(56),(58),(62),(62),(64),(65),(66),(80),(135),(137),(138),(139),(140),(142),(143),(144),(145); +commit; + +DROP TABLE IF EXISTS bug30243_2; +CREATE TABLE bug30243_2 ( + org_id int(11) default NULL, + KEY `contacts$org_id` (org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO bug30243_2 VALUES (NULL); + +begin; +let $i=16; +while ($i) +{ + INSERT INTO bug30243_2 SELECT NULL FROM bug30243_2; + dec $i; +} + +INSERT INTO bug30243_2 VALUES (11),(15),(16),(17),(20),(21),(23),(24),(25), +(26),(27),(28),(29),(30),(31),(32),(33),(34),(37),(38),(40),(41),(42),(43), +(44),(45),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46), +(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(48), +(48),(50),(51),(52),(52),(53),(54),(55),(57),(60),(61),(62),(62),(62),(62), +(62),(63),(64),(64),(65),(66),(66),(67),(68),(69),(70),(71),(72),(73),(74), +(75),(76),(77),(78),(79),(80),(80),(81),(82),(83),(84),(85),(86),(87),(88), +(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102), +(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114), +(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126), +(127),(128),(129),(130),(131),(132),(133),(133),(135),(135),(135),(135), +(136),(136),(138),(138),(139),(139),(139),(140),(141),(141),(142),(143), +(143),(145),(145); +commit; + + +-- enable_result_log +-- enable_query_log + +# check tables's value +select count(*) from bug30243_3 where org_id is not NULL; +select count(*) from bug30243_3 where org_id is NULL; + +select count(*) from bug30243_2 where org_id is not NULL; +select count(*) from bug30243_2 where org_id is NULL; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we over estimate the rows per +# unique value (since there are many NULLs). +# Skip this query log since the stats estimate could vary from runs +-- disable_query_log +-- disable_result_log +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; +-- enable_query_log +-- enable_result_log + +# following set operation will fail +#--error ER_WRONG_VALUE_FOR_VAR +--error 1231 +set global innodb_stats_method = "NULL"; + +set global innodb_stats_method = "nulls_ignored"; + +select @@innodb_stats_method; + +# Regenerate the stats with "nulls_ignored" option + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (should be approximately 1 row per value) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id ; + +select @@innodb_stats_method; + +# Try the "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; + +select @@innodb_stats_method; + +analyze table bug30243_1; +analyze table bug30243_2; +analyze table bug30243_3; + +# Following query plan shows that we get the correct rows per +# unique value (~1) +explain SELECT COUNT(*), 0 + FROM bug30243_1 orgs + LEFT JOIN bug30243_3 sa_opportunities + ON orgs.org_id=sa_opportunities.org_id + LEFT JOIN bug30243_2 contacts + ON orgs.org_id=contacts.org_id; + + +# Create a table with all NULL values, make sure the stats calculation +# does not crash with table of all NULL values +-- disable_query_log +CREATE TABLE table_bug30423 ( + org_id int(11) default NULL, + KEY(org_id) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO `table_bug30423` VALUES (NULL); + +begin; +let $i=10; +while ($i) +{ + INSERT INTO table_bug30423 SELECT NULL FROM table_bug30423; + dec $i; +} +commit; + +-- enable_query_log + +SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL; + +# calculate the statistics for the table for "nulls_ignored" and +# "nulls_unequal" option +set global innodb_stats_method = "nulls_unequal"; +analyze table table_bug30423; + +set global innodb_stats_method = "nulls_ignored"; +analyze table table_bug30423; + + +eval set global innodb_stats_method = $innodb_stats_method_orig; + +drop table bug30243_2; + +drop table bug30243_1; + +drop table bug30243_3; + +drop table table_bug30423; diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index a7160d74a32..9f4babfaae6 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -66,6 +66,13 @@ this many index pages */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 +/* Estimated table level stats from sampled value. */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty) \ + ((value * (ib_longlong) index->stat_n_leaf_pages \ + + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size \ + + not_empty) \ + / (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size)) + /*********************************************************************** Marks all extern fields in a record as owned by the record. This function should be called if the delete mark of a record is removed: a not delete @@ -2834,10 +2841,55 @@ btr_estimate_n_rows_in_range( } } +/*********************************************************************** +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /* in: physical record */ + ulint n_unique, /* in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /* in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_longlong* n_not_null) /* in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; + + field = rec_get_nth_field(rec, offsets, i, &rec_len); + + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; + } + } +} + /*********************************************************************** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ void btr_estimate_number_of_different_key_vals( @@ -2851,6 +2903,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_longlong* n_diff; + ib_longlong* n_not_null; + ibool stats_null_not_equal; ulint not_empty_flag = 0; ulint total_external_size = 0; ulint i; @@ -2858,24 +2912,47 @@ btr_estimate_number_of_different_key_vals( ulint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_; - *offsets_next_rec_ - = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_; + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); - memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong)); + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* We sample some pages in the index to get an estimate */ for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) { - rec_t* supremum; mtr_start(&mtr); btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); @@ -2888,18 +2965,22 @@ btr_estimate_number_of_different_key_vals( page = btr_cur_get_page(&cursor); - supremum = page_get_supremum_rec(page); rec = page_rec_get_next(page_get_infimum_rec(page)); - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { + while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + if (page_rec_is_supremum(next_rec)) { break; } @@ -2911,7 +2992,8 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, + index, stats_null_not_equal, + &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -2921,6 +3003,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -2971,14 +3059,8 @@ btr_estimate_number_of_different_key_vals( included in index->stat_n_leaf_pages) */ for (j = 0; j <= n_cols; j++) { - index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_longlong)index->stat_n_leaf_pages - + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 - + total_external_size - + not_empty_flag) - / (BTR_KEY_VAL_ESTIMATE_N_PAGES - + total_external_size)); + index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then @@ -2997,12 +3079,20 @@ btr_estimate_number_of_different_key_vals( } index->stat_n_diff_key_vals[j] += add_on; + + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, + total_external_size, not_empty_flag); + } } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index fda6555e082..beea0a2f411 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -1358,6 +1358,12 @@ dict_index_add_to_cache( new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_longlong)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -3817,6 +3823,10 @@ dict_update_statistics_low( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4c52326a58a..6f58fd70fbd 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -130,6 +130,25 @@ static my_bool innobase_adaptive_hash_index = TRUE; static char* internal_innobase_data_file_path = NULL; +/* Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/* Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -6362,6 +6381,65 @@ ha_innobase::read_time( return(ranges + (double) rows / (double) total_rows * time_for_scan); } +/************************************************************************* +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + /* out: estimated record per key + value */ + dict_index_t* index, /* in: dict_index_t structure */ + ulint i, /* in: the column we are + calculating rec per key */ + ha_rows records) /* in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_longlong num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /************************************************************************* Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -6568,13 +6646,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -8990,6 +9063,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ AUTOINC_NO_LOCKING, 0); /* Maximum value */ +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -9031,6 +9111,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_on_metadata), MYSQL_SYSVAR(use_legacy_cardinality_algorithm), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(support_xa), MYSQL_SYSVAR(sync_spin_loops), diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 213dcb7f568..20235c55f22 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -404,7 +404,10 @@ btr_estimate_n_rows_in_range( /*********************************************************************** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ void btr_estimate_number_of_different_key_vals( diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 2f2a7441478..83dbf65ea41 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -222,6 +222,12 @@ struct dict_index_struct{ for this index, for each n-column prefix where n <= dict_get_n_unique(index); we periodically calculate new estimates */ + ib_longlong* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /* approximate index size in database pages */ ulint stat_n_leaf_pages; diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h index c6a6e5de4db..22a22d13e17 100644 --- a/storage/innobase/include/rem0cmp.h +++ b/storage/innobase/include/rem0cmp.h @@ -141,6 +141,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic index 52dc7ff5dc9..45e12301a3c 100644 --- a/storage/innobase/include/rem0cmp.ic +++ b/storage/innobase/include/rem0cmp.ic @@ -72,5 +72,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + FALSE, &match_f, &match_b)); } diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 3dd4bb961f9..811074b2be8 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -91,6 +91,11 @@ extern ulint srv_lock_table_size; extern ulint srv_n_file_io_threads; +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -286,6 +291,19 @@ of lower numbers are included. */ #define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward in connection with recovery */ +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + /************************************************************************* Boots Innobase server. */ diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c index ca0ec663548..2939c119e2e 100644 --- a/storage/innobase/rem/rem0cmp.c +++ b/storage/innobase/rem/rem0cmp.c @@ -720,6 +720,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -821,9 +825,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - goto next_field; - + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { + ret = -1; + } else { + goto next_field; + } } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 5b1184fb416..9c34e73109c 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -218,6 +218,11 @@ ulong srv_max_buf_pool_modified_pct = 90; /* variable counts amount of data read in total (in bytes) */ ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ ulint srv_data_written = 0; diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 8eb63fe8c78..43ffa762ddb 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,11 @@ +2011-01-14 The InnoDB Team + * btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc, + include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h, + include/rem0cmp.ic, include/srv0srv.h, rem/rem0cmp.c, + srv/srv0srv.c, innodb_bug30423.test: + Fix Bug#30423 InnoDBs treatment of NULL in index stats causes + bad "rows examined" estimates + 2011-01-06 The InnoDB Team * handler/i_s.cc, include/trx0i_s.h, trx/trx0i_s.c: Fix Bug#55397 cannot select from innodb_trx when trx_query contains diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index c57255a25ae..1fb0bc39933 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -100,6 +100,18 @@ can be released by page reorganize, then it is reorganized */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ + (((value) * (ib_int64_t) index->stat_n_leaf_pages \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -3200,10 +3212,55 @@ btr_estimate_n_rows_in_range( } } +/*******************************************************************//** +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /*!< in: physical record */ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_int64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; + + field = rec_get_nth_field(rec, offsets, i, &rec_len); + + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; + } + } +} + /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -3217,6 +3274,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_int64_t* n_diff; + ib_int64_t* n_not_null; + ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; @@ -3225,16 +3284,43 @@ btr_estimate_number_of_different_key_vals( ullint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - rec_offs_init(offsets_rec_); - rec_offs_init(offsets_next_rec_); + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ @@ -3251,7 +3337,6 @@ btr_estimate_number_of_different_key_vals( /* We sample some pages in the index to get an estimate */ for (i = 0; i < n_sample_pages; i++) { - rec_t* supremum; mtr_start(&mtr); btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); @@ -3264,18 +3349,22 @@ btr_estimate_number_of_different_key_vals( page = btr_cur_get_page(&cursor); - supremum = page_get_supremum_rec(page); rec = page_rec_get_next(page_get_infimum_rec(page)); - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { + while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + if (page_rec_is_supremum(next_rec)) { break; } @@ -3287,7 +3376,8 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, + index, stats_null_not_equal, + &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -3297,6 +3387,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -3348,13 +3444,9 @@ btr_estimate_number_of_different_key_vals( for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_int64_t)index->stat_n_leaf_pages - + n_sample_pages - 1 - + total_external_size - + not_empty_flag) - / (n_sample_pages - + total_external_size)); + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * n_sample_pages + total_external_size, then @@ -3373,12 +3465,20 @@ btr_estimate_number_of_different_key_vals( } index->stat_n_diff_key_vals[j] += add_on; + + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); + } } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c index 67765555658..ff56e9cb76a 100644 --- a/storage/innodb_plugin/dict/dict0dict.c +++ b/storage/innodb_plugin/dict/dict0dict.c @@ -1669,6 +1669,12 @@ undo_size_ok: new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_int64_t)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -4291,6 +4297,10 @@ dict_update_statistics( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 86168e2bc9b..2d60c7397b0 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -174,6 +174,25 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +/** Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/** Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -7507,6 +7526,65 @@ innobase_get_mysql_key_number_for_index( return(0); } + +/*********************************************************************//** +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" +@return estimated record per key value */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + dict_index_t* index, /*!< in: dict_index_t structure */ + ulint i, /*!< in: the column we are + calculating rec per key */ + ha_rows records) /*!< in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_int64_t num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -7737,13 +7815,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -10934,6 +11007,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, innodb_change_buffering_validate, innodb_change_buffering_update, "inserts"); +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -10988,6 +11068,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_on_metadata), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(replication_delay), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(strict_mode), diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index b477ad0320a..cb8cb399715 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -478,7 +478,10 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h index 19782c2e76a..09a068ccb93 100644 --- a/storage/innodb_plugin/include/dict0mem.h +++ b/storage/innodb_plugin/include/dict0mem.h @@ -321,6 +321,12 @@ struct dict_index_struct{ dict_get_n_unique(index); we periodically calculate new estimates */ + ib_int64_t* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */ diff --git a/storage/innodb_plugin/include/rem0cmp.h b/storage/innodb_plugin/include/rem0cmp.h index 2f751a38864..a908521c9f7 100644 --- a/storage/innodb_plugin/include/rem0cmp.h +++ b/storage/innodb_plugin/include/rem0cmp.h @@ -165,6 +165,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current diff --git a/storage/innodb_plugin/include/rem0cmp.ic b/storage/innodb_plugin/include/rem0cmp.ic index 39ef5f4fba3..63415fe7837 100644 --- a/storage/innodb_plugin/include/rem0cmp.ic +++ b/storage/innodb_plugin/include/rem0cmp.ic @@ -87,5 +87,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + FALSE, &match_f, &match_b)); } diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 7aa2ce74720..91ae895040c 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -154,6 +154,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ #define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -363,6 +368,19 @@ enum { in connection with recovery */ }; +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { diff --git a/storage/innodb_plugin/rem/rem0cmp.c b/storage/innodb_plugin/rem/rem0cmp.c index 35b67992558..04d2c15437b 100644 --- a/storage/innodb_plugin/rem/rem0cmp.c +++ b/storage/innodb_plugin/rem/rem0cmp.c @@ -862,6 +862,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -961,9 +965,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - goto next_field; - + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { + ret = -1; + } else { + goto next_field; + } } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index f7e7e351bdc..3cf17f33c40 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -243,6 +243,11 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ UNIV_INTERN ulint srv_data_written = 0;