From 0af84363b1ab5ca6fced83c8dc24de6251cced3e Mon Sep 17 00:00:00 2001 From: Joerg Bruehe Date: Mon, 16 Apr 2012 12:36:21 +0200 Subject: [PATCH 01/39] Raise version number after cloning for the 5.5.24 build. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0f96ceb238d..6111f6d453f 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=24 +MYSQL_VERSION_PATCH=25 MYSQL_VERSION_EXTRA= From b61d6be9fdfa61b7ae8e8436cf1748fc126ec15f Mon Sep 17 00:00:00 2001 From: Georgi Kodinov Date: Tue, 17 Apr 2012 13:25:41 +0300 Subject: [PATCH 02/39] Raise version number after cloning 5.1.63 --- configure.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.in b/configure.in index ccc2ac26570..84295a15f4a 100644 --- a/configure.in +++ b/configure.in @@ -12,7 +12,7 @@ dnl dnl When changing the major version number please also check the switch dnl statement in mysqlbinlog::check_master_version(). You may also need dnl to update version.c in ndb. -AC_INIT([MySQL Server], [5.1.63], [], [mysql]) +AC_INIT([MySQL Server], [5.1.64], [], [mysql]) AC_CONFIG_SRCDIR([sql/mysqld.cc]) AC_CANONICAL_SYSTEM From fe0400694171f0def529e35a77db128b835f5626 Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Tue, 17 Apr 2012 16:54:02 +0530 Subject: [PATCH 03/39] Bug #12902967 CREATING SELF REFERENCING FK ON SAME INDEX UNHANDLED, CONFUSING ERROR The main confusion with the error message is that "it implies that your data dictionary may now be out of sync". This patch will remove the unwanted and the misleading error message by not doing an unnecessary operation in the error handling code. rb://980 approved by: Dmitry Lenev --- .../suite/innodb/r/innodb_bug12902967.result | 6 +++ .../suite/innodb/t/innodb_bug12902967.test | 42 +++++++++++++++ sql/sql_table.cc | 53 +++++++++++++++---- 3 files changed, 91 insertions(+), 10 deletions(-) create mode 100644 mysql-test/suite/innodb/r/innodb_bug12902967.result create mode 100644 mysql-test/suite/innodb/t/innodb_bug12902967.test diff --git a/mysql-test/suite/innodb/r/innodb_bug12902967.result b/mysql-test/suite/innodb/r/innodb_bug12902967.result new file mode 100644 index 00000000000..b20710f08fb --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug12902967.result @@ -0,0 +1,6 @@ +create table t1 (f1 integer primary key) engine innodb; +alter table t1 add constraint c1 foreign key (f1) references t1(f1); +ERROR HY000: Error on rename of '#sql-temporary' to './test/t1' (errno: 150) +InnoDB: which are not compatible with the new table definition. +InnoDB: has or is referenced in foreign key constraints +drop table t1; diff --git a/mysql-test/suite/innodb/t/innodb_bug12902967.test b/mysql-test/suite/innodb/t/innodb_bug12902967.test new file mode 100644 index 00000000000..87e648c2726 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug12902967.test @@ -0,0 +1,42 @@ +# Bug 12902967: Creating self referencing fk on same index unhandled, +# confusing error +# +# Creating a self referencing foreign key on the same +# column/index is an unhandled exception, it should throw a sensible +# error but instead implies that your data dictionary may now be out +# of sync: + +--source include/have_innodb.inc + +let error_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; +--remove_file $error_log +--source include/restart_mysqld.inc + +create table t1 (f1 integer primary key) engine innodb; + +# The below statement should produce error message in error log. +# This error message should mention problem with foreign keys +# rather than with data dictionary. +--replace_regex /'\.\/test\/#sql-[0-9a-f_]*'/'#sql-temporary'/ +--error ER_ERROR_ON_RENAME +alter table t1 add constraint c1 foreign key (f1) references t1(f1); + +perl; + +$file = "$ENV{'error_log'}"; +open ( FILE, $file) || die "can't open file! ($file)\n"; +@lines = ; +close (FILE); +$count = 0; +foreach $line (reverse @lines) { + if ($line =~ "^InnoDB:") { + ++$count; + print "$line"; + if ($count == 2) { + break; + } + } +} +EOF + +drop table t1; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 0f4bac47cab..c146079fdb3 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -246,10 +246,17 @@ uint explain_filename(THD* thd, { part_name_len= tmp_p - part_name - 1; subpart_name= tmp_p + 3; + tmp_p+= 3; + } + else if ((tmp_p[1] == 'Q' || tmp_p[1] == 'q') && + (tmp_p[2] == 'L' || tmp_p[2] == 'l') && + tmp_p[3] == '-') + { + name_type= TEMP; + tmp_p+= 4; /* sql- prefix found */ } else res= 2; - tmp_p+= 3; break; case 'T': case 't': @@ -6718,21 +6725,47 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, (void) quick_rm_table(new_db_type, new_db, tmp_name, FN_IS_TMP); } else if (mysql_rename_table(new_db_type, new_db, tmp_name, new_db, - new_alias, FN_FROM_IS_TMP) || - ((new_name != table_name || new_db != db) && // we also do rename - (need_copy_table != ALTER_TABLE_METADATA_ONLY || - mysql_rename_table(save_old_db_type, db, table_name, new_db, - new_alias, NO_FRM_RENAME)) && - Table_triggers_list::change_table_name(thd, db, alias, table_name, - new_db, new_alias))) + new_alias, FN_FROM_IS_TMP)) { /* Try to get everything back. */ - error=1; - (void) quick_rm_table(new_db_type,new_db,new_alias, 0); + error= 1; (void) quick_rm_table(new_db_type, new_db, tmp_name, FN_IS_TMP); (void) mysql_rename_table(old_db_type, db, old_name, db, alias, FN_FROM_IS_TMP); } + else if (new_name != table_name || new_db != db) + { + if (need_copy_table == ALTER_TABLE_METADATA_ONLY && + mysql_rename_table(save_old_db_type, db, table_name, new_db, + new_alias, NO_FRM_RENAME)) + { + /* Try to get everything back. */ + error= 1; + (void) quick_rm_table(new_db_type, new_db, new_alias, 0); + (void) mysql_rename_table(old_db_type, db, old_name, db, alias, + FN_FROM_IS_TMP); + } + else if (Table_triggers_list::change_table_name(thd, db, alias, + table_name, new_db, + new_alias)) + { + /* Try to get everything back. */ + error= 1; + (void) quick_rm_table(new_db_type, new_db, new_alias, 0); + (void) mysql_rename_table(old_db_type, db, old_name, db, + alias, FN_FROM_IS_TMP); + /* + If we were performing "fast"/in-place ALTER TABLE we also need + to restore old name of table in storage engine as a separate + step, as the above rename affects .FRM only. + */ + if (need_copy_table == ALTER_TABLE_METADATA_ONLY) + { + (void) mysql_rename_table(save_old_db_type, new_db, new_alias, + db, table_name, NO_FRM_RENAME); + } + } + } if (! error) (void) quick_rm_table(old_db_type, db, old_name, FN_IS_TMP); From 75ff20a2b4311e9235fba71544c256481fe7f24b Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Wed, 18 Apr 2012 09:21:23 +0530 Subject: [PATCH 04/39] Bug #12902967 Creating self referencing fk on same index unhandled, confusing error. I have added the not_embedded.inc in the test file. --- mysql-test/suite/innodb/t/innodb_bug12902967.test | 1 + 1 file changed, 1 insertion(+) diff --git a/mysql-test/suite/innodb/t/innodb_bug12902967.test b/mysql-test/suite/innodb/t/innodb_bug12902967.test index 87e648c2726..9101c8c705b 100644 --- a/mysql-test/suite/innodb/t/innodb_bug12902967.test +++ b/mysql-test/suite/innodb/t/innodb_bug12902967.test @@ -7,6 +7,7 @@ # of sync: --source include/have_innodb.inc +--source include/not_embedded.inc let error_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; --remove_file $error_log From 25f82f8a261963f633ccfdabce27dfc1e0309286 Mon Sep 17 00:00:00 2001 From: Chaithra Gopalareddy Date: Wed, 18 Apr 2012 11:25:01 +0530 Subject: [PATCH 05/39] Bug#12713907:STRANGE OPTIMIZE & WRONG RESULT UNDER ORDER BY COUNT(*) LIMIT. PROBLEM: With respect to problem in the bug description, we exhibit different behaviors for the two tables presented, because innodb statistics (rec_per_key in this case) are updated for the first table and not so for the second one. As a result the query plan gets changed in test_if_skip_sort_order to use 'index' scan. Hence the difference in the explain output. (NOTE: We can reproduce the problem with first table by reducing the number of tuples and changing the table structure) The varied output w.r.t the query on the second table is because of the result in the query plan change. When a query plan is changed to use 'index' scan, after the call to test_if_skip_sort_order, we set keyread to TRUE immedietly. If for some reason we drop this index scan for a filesort later on, we fetch only the keys not the entire tuple. As a result we would see junk values in the result set. Following is the code flow: Call test_if_skip_sort_order -Choose an index to give sorted output -If this is a covering index, set_keyread to TRUE -Set the scan to INDEX scan Call test_if_skip_sort_order second time -Index is not chosen (note that we do not pass the actual limit value second time. Hence we do not choose index scan second time which in itself is a bug fixed in 5.6 with WL#5558) -goto filesort Call filesort -Create quick range on a different index -Since keyread is set to TRUE, we fetch only the columns of the index -results in the required columns are not fetched FIX: Remove the call to set_keyread(TRUE) from test_if_skip_sort_order. The access function which is 'join_read_first' or 'join_read_last' calls set_keyread anyways. mysql-test/r/func_group_innodb.result: Added test result for Bug#12713907 mysql-test/t/func_group_innodb.test: Added test case for Bug#12713907 sql/sql_select.cc: Remove the call to set_keyread as we do it from access functions 'join_read_first' and 'join_read_last' --- mysql-test/r/func_group_innodb.result | 40 ++++++++++++++++++++++++ mysql-test/t/func_group_innodb.test | 44 +++++++++++++++++++++++++++ sql/sql_select.cc | 2 -- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/func_group_innodb.result b/mysql-test/r/func_group_innodb.result index 908e85c1652..e68242a8191 100644 --- a/mysql-test/r/func_group_innodb.result +++ b/mysql-test/r/func_group_innodb.result @@ -145,3 +145,43 @@ select count(*), min(7), max(7) from t2m, t1i; count(*) min(7) max(7) 0 NULL NULL drop table t1m, t1i, t2m, t2i; +# +# Bug#12713907: STRANGE OPTIMIZE & WRONG RESULT UNDER ORDER BY +# COUNT(*) LIMIT. +# +CREATE TABLE t1 ( +id BIGINT(20) , +member_id_to INT(11) , +r_date DATE , +PRIMARY KEY (id,r_date), +KEY r_date_idx (r_date), +KEY t1_idx01 (member_id_to) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +(107924526,518491,'2011-05-01'), +(107924527,518491,'2011-05-01'), +(107924528,518491,'2011-05-01'), +(107924529,518491,'2011-05-01'), +(107924530,518491,'2011-05-01'), +(107924531,518491,'2011-05-01'), +(107924532,518491,'2011-05-01'), +(107924534,518491,'2011-06-21'), +(107924535,518491,'2011-06-21'), +(107924536,518491,'2011-06-21'), +(107924537,518491,'2011-06-21'), +(107924538,518491,'2011-06-21'), +(107924542,1601319,'2011-06-21'), +(107924543,1601319,'2011-06-21'), +(107924544,1601319,'2011-06-21'), +(107924545,1601319,'2011-06-21'), +(107924546,1601319,'2011-06-21'), +(107924547,1601319,'2011-06-21'), +(107924548,1601319,'2011-06-21'), +(107924549,1601319,'2011-06-21'), +(107924550,1601319,'2011-06-21'); +SELECT member_id_to, COUNT(*) FROM t1 WHERE r_date = +'2011-06-21' GROUP BY member_id_to ORDER BY 2 LIMIT 1; +member_id_to COUNT(*) +518491 5 +DROP TABLE t1; +# End of test BUG#12713907 diff --git a/mysql-test/t/func_group_innodb.test b/mysql-test/t/func_group_innodb.test index 1bdfd8f54bb..58f365bb244 100644 --- a/mysql-test/t/func_group_innodb.test +++ b/mysql-test/t/func_group_innodb.test @@ -83,3 +83,47 @@ explain select count(*), min(7), max(7) from t2m, t1i; select count(*), min(7), max(7) from t2m, t1i; drop table t1m, t1i, t2m, t2i; + +--echo # +--echo # Bug#12713907: STRANGE OPTIMIZE & WRONG RESULT UNDER ORDER BY +--echo # COUNT(*) LIMIT. +--echo # + +CREATE TABLE t1 ( +id BIGINT(20) , +member_id_to INT(11) , +r_date DATE , +PRIMARY KEY (id,r_date), +KEY r_date_idx (r_date), +KEY t1_idx01 (member_id_to) +) ENGINE=InnoDB; + +INSERT INTO t1 VALUES +(107924526,518491,'2011-05-01'), +(107924527,518491,'2011-05-01'), +(107924528,518491,'2011-05-01'), +(107924529,518491,'2011-05-01'), +(107924530,518491,'2011-05-01'), +(107924531,518491,'2011-05-01'), +(107924532,518491,'2011-05-01'), +(107924534,518491,'2011-06-21'), +(107924535,518491,'2011-06-21'), +(107924536,518491,'2011-06-21'), +(107924537,518491,'2011-06-21'), +(107924538,518491,'2011-06-21'), +(107924542,1601319,'2011-06-21'), +(107924543,1601319,'2011-06-21'), +(107924544,1601319,'2011-06-21'), +(107924545,1601319,'2011-06-21'), +(107924546,1601319,'2011-06-21'), +(107924547,1601319,'2011-06-21'), +(107924548,1601319,'2011-06-21'), +(107924549,1601319,'2011-06-21'), +(107924550,1601319,'2011-06-21'); + +SELECT member_id_to, COUNT(*) FROM t1 WHERE r_date = + '2011-06-21' GROUP BY member_id_to ORDER BY 2 LIMIT 1; + +DROP TABLE t1; + +--echo # End of test BUG#12713907 diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 5f1efabfc97..1c5f4f5a648 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -13820,8 +13820,6 @@ check_reverse_order: join_read_first:join_read_last; tab->type=JT_NEXT; // Read with index_first(), index_next() - if (table->covering_keys.is_set(best_key)) - table->set_keyread(TRUE); table->file->ha_index_or_rnd_end(); if (tab->join->select_options & SELECT_DESCRIBE) { From 448c3d627542c835ac7ea1851e8e019596e377fd Mon Sep 17 00:00:00 2001 From: Nuno Carvalho Date: Wed, 18 Apr 2012 10:08:01 +0100 Subject: [PATCH 06/39] WL#6236: Allow SHOW MASTER LOGS and SHOW BINARY LOGS with REPLICATION CLIENT Currently SHOW MASTER LOGS and SHOW BINARY LOGS require the SUPER privilege. Monitoring tools (such as MEM) often want to check this output - for instance MEM generates the SUM of the sizes of the logs reported here, and puts that in the Replication overview within the MEM Dashboard. However, because of the SUPER requirement, these tools often have an account that holds open the connection whilst monitoring, and can lock out administrators when the server gets overloaded and reaches max_connections - there is already another SUPER privileged account connected, the "monitor". As SHOW MASTER STATUS, and all other replication related statements, return with either REPLICATION CLIENT or SUPER privileges, this worklog is to make SHOW MASTER LOGS and SHOW BINARY LOGS be consistent with this as well, and allow both of these commands with either SUPER or REPLICATION CLIENT. This allows monitoring tools to not require a SUPER privilege any more, so is safer in overloaded situations, as well as being more secure, as lighter privileges can be given to users of such tools or scripts. --- mysql-test/suite/binlog/r/binlog_grant.result | 4 ++++ mysql-test/suite/binlog/t/binlog_grant.test | 19 +++++++++++++++++++ sql/sql_parse.cc | 2 +- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/binlog/r/binlog_grant.result b/mysql-test/suite/binlog/r/binlog_grant.result index 21ebb891103..38442349d3b 100644 --- a/mysql-test/suite/binlog/r/binlog_grant.result +++ b/mysql-test/suite/binlog/r/binlog_grant.result @@ -26,3 +26,7 @@ ERROR 42000: Access denied; you need the SUPER privilege for this operation **** Clean up **** set global binlog_format = @saved_binlog_format; drop user mysqltest_1@localhost; +GRANT REPLICATION CLIENT ON *.* TO 'mysqltest_1'@'localhost'; +SHOW MASTER LOGS; +SHOW BINARY LOGS; +DROP USER 'mysqltest_1'@'localhost'; diff --git a/mysql-test/suite/binlog/t/binlog_grant.test b/mysql-test/suite/binlog/t/binlog_grant.test index d36dcce4cc3..bbf57b075af 100644 --- a/mysql-test/suite/binlog/t/binlog_grant.test +++ b/mysql-test/suite/binlog/t/binlog_grant.test @@ -58,3 +58,22 @@ disconnect root; connection default; set global binlog_format = @saved_binlog_format; drop user mysqltest_1@localhost; + + +# Testing if REPLICATION CLIENT privilege is enough to execute +# SHOW MASTER LOGS and SHOW BINARY. +GRANT REPLICATION CLIENT ON *.* TO 'mysqltest_1'@'localhost'; +--connect(rpl,localhost,mysqltest_1,,) + +--connection rpl +# We are only interested if the following commands succeed and not on +# their output. +--disable_result_log +SHOW MASTER LOGS; +SHOW BINARY LOGS; +--enable_result_log + +# clean up +--disconnect rpl +connection default; +DROP USER 'mysqltest_1'@'localhost'; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 0f190809ab9..14b778328d2 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -3057,7 +3057,7 @@ end_with_restore_list: goto error; #else { - if (check_global_access(thd, SUPER_ACL)) + if (check_global_access(thd, SUPER_ACL | REPL_CLIENT_ACL)) goto error; res = show_binlogs(thd); break; From b73da023bfc173b404f5298d6b8b36cc8cfc493d Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Wed, 18 Apr 2012 15:16:11 +0530 Subject: [PATCH 07/39] Bug #12902967 Creating self referencing fk on same index unhandled, confusing error. Updated the test script to work properly on windows platform. --- mysql-test/suite/innodb/t/innodb_bug12902967.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mysql-test/suite/innodb/t/innodb_bug12902967.test b/mysql-test/suite/innodb/t/innodb_bug12902967.test index 9101c8c705b..7bc5727a7a6 100644 --- a/mysql-test/suite/innodb/t/innodb_bug12902967.test +++ b/mysql-test/suite/innodb/t/innodb_bug12902967.test @@ -10,7 +10,6 @@ --source include/not_embedded.inc let error_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; ---remove_file $error_log --source include/restart_mysqld.inc create table t1 (f1 integer primary key) engine innodb; @@ -21,7 +20,7 @@ create table t1 (f1 integer primary key) engine innodb; --replace_regex /'\.\/test\/#sql-[0-9a-f_]*'/'#sql-temporary'/ --error ER_ERROR_ON_RENAME alter table t1 add constraint c1 foreign key (f1) references t1(f1); - +--source include/restart_mysqld.inc perl; $file = "$ENV{'error_log'}"; From 11b2cf4f03b7c4b84c26d3c95cdf6f8fa6322349 Mon Sep 17 00:00:00 2001 From: Tor Didriksen Date: Wed, 18 Apr 2012 13:14:05 +0200 Subject: [PATCH 08/39] Backport 5.5=>5.1 Patch for Bug#13805127: Stored program cache produces wrong result in same THD. --- mysql-test/r/ps.result | 103 ++++++++++++++++++++++++ mysql-test/r/sp.result | 38 +++++++++ mysql-test/t/ps.test | 90 +++++++++++++++++++++ mysql-test/t/sp.test | 37 +++++++++ sql/mem_root_array.h | 175 +++++++++++++++++++++++++++++++++++++++++ sql/sql_lex.cc | 22 ++++++ sql/sql_lex.h | 16 +++- sql/sql_prepare.cc | 8 ++ 8 files changed, 487 insertions(+), 2 deletions(-) create mode 100644 sql/mem_root_array.h diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result index 84c64a3905a..60370e4b708 100644 --- a/mysql-test/r/ps.result +++ b/mysql-test/r/ps.result @@ -3040,3 +3040,106 @@ id select_type table type possible_keys key key_len ref rows Extra DEALLOCATE PREPARE stmt; DROP TABLE t1; End of 5.1 tests. + +# Bug#13805127: Stored program cache produces wrong result in same THD + +PREPARE s1 FROM +" +SELECT c1, t2.c2, count(c3) +FROM + ( + SELECT 3 as c2 FROM dual WHERE @x = 1 + UNION + SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2 +"; + +SET @x = 1; +SELECT c1, t2.c2, count(c3) +FROM +( +SELECT 3 as c2 FROM dual WHERE @x = 1 +UNION +SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 +) AS t1, +( +SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual +UNION +SELECT '2012-03-01 02:00:00', 3, 2 FROM dual +UNION +SELECT '2012-03-01 01:00:00', 2, 1 FROM dual +) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 + +EXECUTE s1; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 + +SET @x = 2; +SELECT c1, t2.c2, count(c3) +FROM +( +SELECT 3 as c2 FROM dual WHERE @x = 1 +UNION +SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 +) AS t1, +( +SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual +UNION +SELECT '2012-03-01 02:00:00', 3, 2 FROM dual +UNION +SELECT '2012-03-01 01:00:00', 2, 1 FROM dual +) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 + +EXECUTE s1; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 + +SET @x = 1; +SELECT c1, t2.c2, count(c3) +FROM +( +SELECT 3 as c2 FROM dual WHERE @x = 1 +UNION +SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 +) AS t1, +( +SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual +UNION +SELECT '2012-03-01 02:00:00', 3, 2 FROM dual +UNION +SELECT '2012-03-01 01:00:00', 2, 1 FROM dual +) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 + +EXECUTE s1; +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 +DEALLOCATE PREPARE s1; diff --git a/mysql-test/r/sp.result b/mysql-test/r/sp.result index 11d6ff02756..0d0d76e609b 100644 --- a/mysql-test/r/sp.result +++ b/mysql-test/r/sp.result @@ -7110,3 +7110,41 @@ DROP FUNCTION f1; # ------------------------------------------------------------------ # -- End of 5.1 tests # ------------------------------------------------------------------ + +# Bug#13805127: Stored program cache produces wrong result in same THD + +CREATE PROCEDURE p1(x INT UNSIGNED) +BEGIN +SELECT c1, t2.c2, count(c3) +FROM +( +SELECT 3 as c2 FROM dual WHERE x = 1 +UNION +SELECT 2 FROM dual WHERE x = 1 OR x = 2 +) AS t1, +( +SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual +UNION +SELECT '2012-03-01 02:00:00', 3, 2 FROM dual +UNION +SELECT '2012-03-01 01:00:00', 2, 1 FROM dual +) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2 +; +END| + +CALL p1(1); +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 +CALL p1(2); +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +CALL p1(1); +c1 c2 count(c3) +2012-03-01 01:00:00 2 1 +2012-03-01 01:00:00 3 1 +2012-03-01 02:00:00 3 1 +DROP PROCEDURE p1; diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test index 9b3f3e750e1..88bfe1dd2be 100644 --- a/mysql-test/t/ps.test +++ b/mysql-test/t/ps.test @@ -3102,3 +3102,93 @@ DEALLOCATE PREPARE stmt; DROP TABLE t1; --echo End of 5.1 tests. + +--echo +--echo # Bug#13805127: Stored program cache produces wrong result in same THD +--echo + +PREPARE s1 FROM +" +SELECT c1, t2.c2, count(c3) +FROM + ( + SELECT 3 as c2 FROM dual WHERE @x = 1 + UNION + SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2 +"; + +--echo +SET @x = 1; +SELECT c1, t2.c2, count(c3) +FROM + ( + SELECT 3 as c2 FROM dual WHERE @x = 1 + UNION + SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +--echo +EXECUTE s1; + +--echo +SET @x = 2; +SELECT c1, t2.c2, count(c3) +FROM + ( + SELECT 3 as c2 FROM dual WHERE @x = 1 + UNION + SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +--echo +EXECUTE s1; + +--echo +SET @x = 1; +SELECT c1, t2.c2, count(c3) +FROM + ( + SELECT 3 as c2 FROM dual WHERE @x = 1 + UNION + SELECT 2 FROM dual WHERE @x = 1 OR @x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 +WHERE t2.c2 = t1.c2 +GROUP BY c1, c2; +--echo +EXECUTE s1; + +DEALLOCATE PREPARE s1; + diff --git a/mysql-test/t/sp.test b/mysql-test/t/sp.test index ae4e1dd588e..75d290f7c8b 100644 --- a/mysql-test/t/sp.test +++ b/mysql-test/t/sp.test @@ -8429,3 +8429,40 @@ DROP FUNCTION f1; --echo # ------------------------------------------------------------------ --echo # -- End of 5.1 tests --echo # ------------------------------------------------------------------ + +--echo +--echo # Bug#13805127: Stored program cache produces wrong result in same THD +--echo + +delimiter |; + +CREATE PROCEDURE p1(x INT UNSIGNED) +BEGIN + SELECT c1, t2.c2, count(c3) + FROM + ( + SELECT 3 as c2 FROM dual WHERE x = 1 + UNION + SELECT 2 FROM dual WHERE x = 1 OR x = 2 + ) AS t1, + ( + SELECT '2012-03-01 01:00:00' AS c1, 3 as c2, 1 as c3 FROM dual + UNION + SELECT '2012-03-01 02:00:00', 3, 2 FROM dual + UNION + SELECT '2012-03-01 01:00:00', 2, 1 FROM dual + ) AS t2 + WHERE t2.c2 = t1.c2 + GROUP BY c1, c2 + ; +END| + +delimiter ;| + +--echo +CALL p1(1); +CALL p1(2); +CALL p1(1); + +DROP PROCEDURE p1; + diff --git a/sql/mem_root_array.h b/sql/mem_root_array.h new file mode 100644 index 00000000000..5ce4dcb584d --- /dev/null +++ b/sql/mem_root_array.h @@ -0,0 +1,175 @@ +/* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifndef MEM_ROOT_ARRAY_INCLUDED +#define MEM_ROOT_ARRAY_INCLUDED + +#include + +/** + A typesafe replacement for DYNAMIC_ARRAY. + We use MEM_ROOT for allocating storage, rather than the C++ heap. + The interface is chosen to be similar to std::vector. + + @remark + Unlike DYNAMIC_ARRAY, elements are properly copied + (rather than memcpy()d) if the underlying array needs to be expanded. + + @remark + Depending on has_trivial_destructor, we destroy objects which are + removed from the array (including when the array object itself is destroyed). + + @remark + Note that MEM_ROOT has no facility for reusing free space, + so don't use this if multiple re-expansions are likely to happen. + + @param Element_type The type of the elements of the container. + Elements must be copyable. + @param has_trivial_destructor If true, we don't destroy elements. + We could have used type traits to determine this. + __has_trivial_destructor is supported by some (but not all) + compilers we use. +*/ +template +class Mem_root_array +{ +public: + Mem_root_array(MEM_ROOT *root) + : m_root(root), m_array(NULL), m_size(0), m_capacity(0) + { + DBUG_ASSERT(m_root != NULL); + } + + ~Mem_root_array() + { + clear(); + } + + Element_type &at(size_t n) + { + DBUG_ASSERT(n < size()); + return m_array[n]; + } + + const Element_type &at(size_t n) const + { + DBUG_ASSERT(n < size()); + return m_array[n]; + } + + // Returns a pointer to the first element in the array. + Element_type *begin() { return &m_array[0]; } + + // Returns a pointer to the past-the-end element in the array. + Element_type *end() { return &m_array[size()]; } + + // Erases all of the elements. + void clear() + { + if (!empty()) + chop(0); + } + + /* + Chops the tail off the array, erasing all tail elements. + @param pos Index of first element to erase. + */ + void chop(const size_t pos) + { + DBUG_ASSERT(pos < m_size); + if (!has_trivial_destructor) + { + for (size_t ix= pos; ix < m_size; ++ix) + { + Element_type *p= &m_array[ix]; + p->~Element_type(); // Destroy discarded element. + } + } + m_size= pos; + } + + /* + Reserves space for array elements. + Copies over existing elements, in case we are re-expanding the array. + + @param n number of elements. + @retval true if out-of-memory, false otherwise. + */ + bool reserve(size_t n) + { + if (n <= m_capacity) + return false; + + void *mem= alloc_root(m_root, n * element_size()); + if (!mem) + return true; + Element_type *array= static_cast(mem); + + // Copy all the existing elements into the new array. + for (size_t ix= 0; ix < m_size; ++ix) + { + Element_type *new_p= &array[ix]; + Element_type *old_p= &m_array[ix]; + new (new_p) Element_type(*old_p); // Copy into new location. + if (!has_trivial_destructor) + old_p->~Element_type(); // Destroy the old element. + } + + // Forget the old array. + m_array= array; + m_capacity= n; + return false; + } + + /* + Adds a new element at the end of the array, after its current last + element. The content of this new element is initialized to a copy of + the input argument. + + @param element Object to copy. + @retval true if out-of-memory, false otherwise. + */ + bool push_back(const Element_type &element) + { + const size_t min_capacity= 20; + const size_t expansion_factor= 2; + if (0 == m_capacity && reserve(min_capacity)) + return true; + if (m_size == m_capacity && reserve(m_capacity * expansion_factor)) + return true; + Element_type *p= &m_array[m_size++]; + new (p) Element_type(element); + return false; + } + + size_t capacity() const { return m_capacity; } + size_t element_size() const { return sizeof(Element_type); } + bool empty() const { return size() == 0; } + size_t size() const { return m_size; } + +private: + MEM_ROOT *const m_root; + Element_type *m_array; + size_t m_size; + size_t m_capacity; + + // Not (yet) implemented. + Mem_root_array(const Mem_root_array&); + Mem_root_array &operator=(const Mem_root_array&); +}; + + +#endif // MEM_ROOT_ARRAY_INCLUDED diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 4a0553ad59b..fd9367b99f2 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -305,6 +305,8 @@ void lex_start(THD *thd) lex->select_lex.sql_cache= SELECT_LEX::SQL_CACHE_UNSPECIFIED; lex->select_lex.init_order(); lex->select_lex.group_list.empty(); + if (lex->select_lex.group_list_ptrs) + lex->select_lex.group_list_ptrs->clear(); lex->describe= 0; lex->subqueries= FALSE; lex->context_analysis_only= 0; @@ -1632,6 +1634,8 @@ void st_select_lex::init_select() { st_select_lex_node::init_select(); group_list.empty(); + if (group_list_ptrs) + group_list_ptrs->clear(); type= db= 0; having= 0; table_join_options= 0; @@ -2901,6 +2905,8 @@ static void fix_prepare_info_in_table_list(THD *thd, TABLE_LIST *tbl) The passed WHERE and HAVING are to be saved for the future executions. This function saves it, and returns a copy which can be thrashed during this execution of the statement. By saving/thrashing here we mean only + We also save the chain of ORDER::next in group_list, in case + the list is modified by remove_const(). AND/OR trees. The function also calls fix_prepare_info_in_table_list that saves all ON expressions. @@ -2912,6 +2918,19 @@ void st_select_lex::fix_prepare_information(THD *thd, Item **conds, if (!thd->stmt_arena->is_conventional() && first_execution) { first_execution= 0; + if (group_list.first) + { + if (!group_list_ptrs) + { + void *mem= thd->stmt_arena->alloc(sizeof(Group_list_ptrs)); + group_list_ptrs= new (mem) Group_list_ptrs(thd->stmt_arena->mem_root); + } + group_list_ptrs->reserve(group_list.elements); + for (ORDER *order= group_list.first; order; order= order->next) + { + group_list_ptrs->push_back(order); + } + } if (*conds) { prep_where= *conds; @@ -3016,3 +3035,6 @@ bool st_lex::is_partition_management() const alter_info.flags == ALTER_REORGANIZE_PARTITION)); } +#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION +template class Mem_root_array; +#endif diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 035fa1fde91..d512190eecc 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -40,6 +40,7 @@ class Event_parse_data; */ #include "set_var.h" +#include "mem_root_array.h" #ifdef MYSQL_YACC #define LEX_YYSTYPE void * @@ -183,6 +184,7 @@ enum enum_drop_mode }; typedef List List_item; +typedef Mem_root_array Group_list_ptrs; /* SERVERS CACHE CHANGES */ typedef struct st_lex_server_options @@ -590,7 +592,16 @@ public: enum olap_type olap; /* FROM clause - points to the beginning of the TABLE_LIST::next_local list. */ SQL_I_List table_list; - SQL_I_List group_list; /* GROUP BY clause. */ + + /* + GROUP BY clause. + This list may be mutated during optimization (by remove_const()), + so for prepared statements, we keep a copy of the ORDER.next pointers in + group_list_ptrs, and re-establish the original list before each execution. + */ + SQL_I_List group_list; + Group_list_ptrs *group_list_ptrs; + List item_list; /* list of fields & expressions */ List interval_list; bool is_item_list_lookup; @@ -779,7 +790,8 @@ public: bool test_limit(); friend void lex_start(THD *thd); - st_select_lex() : n_sum_items(0), n_child_sum_items(0) {} + st_select_lex() : group_list_ptrs(NULL), n_sum_items(0), n_child_sum_items(0) + {} void make_empty_select() { init_query(); diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 98379dba9ba..27e70aaf843 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -2361,6 +2361,14 @@ void reinit_stmt_before_use(THD *thd, LEX *lex) DBUG_ASSERT(sl->join == 0); ORDER *order; /* Fix GROUP list */ + if (sl->group_list_ptrs && sl->group_list_ptrs->size() > 0) + { + for (uint ix= 0; ix < sl->group_list_ptrs->size() - 1; ++ix) + { + order= sl->group_list_ptrs->at(ix); + order->next= sl->group_list_ptrs->at(ix+1); + } + } for (order= sl->group_list.first; order; order= order->next) order->item= &order->item_ptr; /* Fix ORDER list */ From 892106db9219f2714a65bd0d07da262c5aaa8fca Mon Sep 17 00:00:00 2001 From: Tor Didriksen Date: Wed, 18 Apr 2012 14:13:13 +0200 Subject: [PATCH 09/39] new header file must be listed in Makefile.am --- sql/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/Makefile.am b/sql/Makefile.am index f8ba25377cd..2336faf4e31 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -71,6 +71,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ tztime.h my_decimal.h\ sp_head.h sp_pcontext.h sp_rcontext.h sp.h sp_cache.h \ parse_file.h sql_view.h sql_trigger.h \ + mem_root_array.h \ sql_array.h sql_cursor.h events.h scheduler.h \ event_db_repository.h event_queue.h \ sql_plugin.h authors.h event_parse_data.h \ From bf4161adae1a0b103d5424f2fe3ec8c167311997 Mon Sep 17 00:00:00 2001 From: Mayank Prasad Date: Thu, 19 Apr 2012 14:57:34 +0530 Subject: [PATCH 10/39] BUG#12427262 : 60961: SHOW TABLES VERY SLOW WHEN NOT IN SYSTEM DISK CACHE Reason: This is a regression happened because of changes done in code refactoring in 5.1 from 5.0. Issue: While doing "Show tables" lex->verbose was being checked to avoid opening FRM files to get table type. In case of "Show full table", lex->verbose is true to indicate table type is required. In 5.0, this check was present which got missing in >=5.5. Fix: Added the required check to avoid opening FRM files unnecessarily in case of "Show tables". --- sql/sql_show.cc | 68 +++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/sql/sql_show.cc b/sql/sql_show.cc index ab3217dbe48..1b0f94ce18e 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -3198,39 +3198,44 @@ end: static int fill_schema_table_names(THD *thd, TABLE *table, LEX_STRING *db_name, LEX_STRING *table_name, - bool with_i_schema) + bool with_i_schema, + bool need_table_type) { - if (with_i_schema) + /* Avoid opening FRM files if table type is not needed. */ + if (need_table_type) { - table->field[3]->store(STRING_WITH_LEN("SYSTEM VIEW"), - system_charset_info); - } - else - { - enum legacy_db_type not_used; - char path[FN_REFLEN + 1]; - (void) build_table_filename(path, sizeof(path) - 1, db_name->str, - table_name->str, reg_ext, 0); - switch (mysql_frm_type(thd, path, ¬_used)) { - case FRMTYPE_ERROR: - table->field[3]->store(STRING_WITH_LEN("ERROR"), - system_charset_info); - break; - case FRMTYPE_TABLE: - table->field[3]->store(STRING_WITH_LEN("BASE TABLE"), - system_charset_info); - break; - case FRMTYPE_VIEW: - table->field[3]->store(STRING_WITH_LEN("VIEW"), - system_charset_info); - break; - default: - DBUG_ASSERT(0); - } - if (thd->is_error() && thd->main_da.sql_errno() == ER_NO_SUCH_TABLE) + if (with_i_schema) { - thd->clear_error(); - return 0; + table->field[3]->store(STRING_WITH_LEN("SYSTEM VIEW"), + system_charset_info); + } + else + { + enum legacy_db_type not_used; + char path[FN_REFLEN + 1]; + (void) build_table_filename(path, sizeof(path) - 1, db_name->str, + table_name->str, reg_ext, 0); + switch (mysql_frm_type(thd, path, ¬_used)) { + case FRMTYPE_ERROR: + table->field[3]->store(STRING_WITH_LEN("ERROR"), + system_charset_info); + break; + case FRMTYPE_TABLE: + table->field[3]->store(STRING_WITH_LEN("BASE TABLE"), + system_charset_info); + break; + case FRMTYPE_VIEW: + table->field[3]->store(STRING_WITH_LEN("VIEW"), + system_charset_info); + break; + default: + DBUG_ASSERT(0); + } + if (thd->is_error() && thd->main_da.sql_errno() == ER_NO_SUCH_TABLE) + { + thd->clear_error(); + return 0; + } } } if (schema_table_store_record(thd, table)) @@ -3551,7 +3556,8 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) if (schema_table_idx == SCH_TABLE_NAMES) { if (fill_schema_table_names(thd, tables->table, db_name, - table_name, with_i_schema)) + table_name, with_i_schema, + lex->verbose)) continue; } else From 49e484c8cd2e362e843bbd5d756422cc7e2686d3 Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Fri, 20 Apr 2012 19:41:20 +0300 Subject: [PATCH 11/39] BUG#11754117 incorrect logging of INSERT into auto-increment BUG#11761686 insert_id event is not filtered. Two issues are covered. INSERT into autoincrement field which is not the first part in the composed primary key is unsafe by autoincrement logging design. The case is specific to MyISAM engine because Innodb does not allow such table definition. However no warnings and row-format logging in the MIXED mode was done, and that is fixed. Int-, Rand-, User-var log-events were not filtered along with their parent query that made possible them to screw up execution context of the following query. Fixed with deferring their execution until the parent query. ****** Bug#11754117 Post review fixes. mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result: a new result file is added. mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result: results updated. mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test: regression test for BUG#11754117-45670 is added. mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test: regression test for filtering issue of BUG#11754117 - 45670 is added. sql/log_event.cc: Logics are added for deferring and executing events associated with the Query event. sql/log_event.h: Interface to deferred events batch execution is added. sql/rpl_rli.cc: initialization for new RLI members is added. sql/rpl_rli.h: New members to RLI are added to facilitate deferred events gathering and execution control; two general character RLI cleanup methods are constructed. sql/rpl_utility.cc: Deferred_log_events methods are difined. sql/rpl_utility.h: A new class Deferred_log_events is defined to implement IRU events gathering, execution and cleanup. sql/slave.cc: Necessary changes to initialize `rli->deferred_events' and prevent deferred event deletion in the main read-exec branch. sql/sql_base.cc: A new safe-check function for multi-part pk with auto-increment is defined and deployed in lock_tables(). sql/sql_class.cc: Initialization for a new member and replication cleanups are added to THD class. sql/sql_class.h: THD class receives a new member to hold a specific execution context for slave applier. sql/sql_parse.cc: Execution of the deferred event in started prior to its parent query. --- .../rpl/r/rpl_auto_increment_bug45679.result | 36 +++++++++++ .../rpl/r/rpl_filter_tables_not_exist.result | 21 +++++++ .../rpl/t/rpl_auto_increment_bug45679.test | 62 +++++++++++++++++++ .../rpl/t/rpl_filter_tables_not_exist.test | 60 ++++++++++++++++++ sql/log_event.cc | 36 ++++++++++- sql/log_event.h | 10 +++ sql/rpl_rli.cc | 4 +- sql/rpl_rli.h | 35 +++++++++++ sql/rpl_utility.cc | 61 ++++++++++++++++++ sql/rpl_utility.h | 18 ++++++ sql/slave.cc | 9 ++- sql/sql_base.cc | 34 ++++++++++ sql/sql_class.cc | 8 ++- sql/sql_class.h | 2 + sql/sql_parse.cc | 5 ++ 15 files changed, 397 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result create mode 100644 mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test diff --git a/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result new file mode 100644 index 00000000000..75f4e661dc1 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result @@ -0,0 +1,36 @@ +include/master-slave.inc +[connection master] +call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); +create table tm (b int auto_increment, a int, primary key (a,b)) engine= myisam; +create table ti (b int auto_increment, a int, primary key (a,b)) engine= innodb; +ERROR 42000: Incorrect table definition; there can be only one auto column and it must be defined as a key +create table ti (b int auto_increment, a int, primary key (b,a)) engine= innodb; +set @@binlog_format=statement; +*** autoincrement field is not the first in PK warning must be there: *** +insert into tm set b=null, a=1; +Warnings: +Note 1592 Statement may not be safe to log in statement format. +show warnings; +Level Code Message +Note 1592 Statement may not be safe to log in statement format. +*** no warning when autoincrement is the first in PK +insert into ti set b=null, a=1; +show warnings; +Level Code Message +create function multi_part_pk_with_autoinc (arg int) +returns int +begin +insert into tm set b=null, a=arg; +return arg; +end// +select multi_part_pk_with_autoinc (3); +multi_part_pk_with_autoinc (3) +3 +*** autoincrement field is not the first in PK warning must be there: *** +show warnings; +Level Code Message +set @@binlog_format=mixed; +insert into tm set b=null, a=2; +drop table tm, ti; +drop function multi_part_pk_with_autoinc; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result b/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result index c88dcee9dbc..f0dc97b71f2 100644 --- a/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result +++ b/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result @@ -114,4 +114,25 @@ id c 3 3 [on master] drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +CREATE TABLE test.t5 (a INT AUTO_INCREMENT PRIMARY KEY, b INT, c INT); +CREATE TABLE test.t1 (a INT); +INSERT INTO test.t1 VALUES(1); +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +CREATE TABLE test.t_slave (a INT AUTO_INCREMENT PRIMARY KEY, b INT, c INT); +CREATE TRIGGER t1_update AFTER UPDATE ON test.t1 FOR EACH ROW +INSERT INTO test.t_slave VALUES(NULL, ROUND(RAND() * 1000), @c); +SET INSERT_ID=2; +SET @c=2; +SET @@rand_seed1=10000000, @@rand_seed2=1000000; +INSERT INTO t5 VALUES (NULL, ROUND(RAND() * 1000), @c); +Warnings: +Note 1592 Statement may not be safe to log in statement format. +SELECT b into @b FROM test.t5; +UPDATE test.t1 SET a=2; +SELECT a AS 'ONE' into @a FROM test.t_slave; +SELECT c AS 'NULL' into @c FROM test.t_slave; +SELECT b into @b FROM test.t_slave; +drop table test.t5; +drop table test.t1; +drop table test.t_slave; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test b/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test new file mode 100644 index 00000000000..6996e1c73c7 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test @@ -0,0 +1,62 @@ +# Test of auto-increment. +# +# BUG#11754117-45670 +# Multipart primary key with the autoincrement part not first in it +# is replication unsafe. +# + +source include/master-slave.inc; +source include/have_binlog_format_mixed.inc; +source include/have_innodb.inc; + +call mtr.add_suppression('Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.'); + +--connection master +create table tm (b int auto_increment, a int, primary key (a,b)) engine= myisam; +--error ER_WRONG_AUTO_KEY +create table ti (b int auto_increment, a int, primary key (a,b)) engine= innodb; +create table ti (b int auto_increment, a int, primary key (b,a)) engine= innodb; + +set @@binlog_format=statement; +--echo *** autoincrement field is not the first in PK warning must be there: *** +insert into tm set b=null, a=1; +show warnings; +--echo *** no warning when autoincrement is the first in PK +insert into ti set b=null, a=1; +show warnings; + +delimiter //; +create function multi_part_pk_with_autoinc (arg int) +returns int +begin + insert into tm set b=null, a=arg; + return arg; +end// +delimiter ;// + +select multi_part_pk_with_autoinc (3); +--echo *** autoincrement field is not the first in PK warning must be there: *** +show warnings; + +set @@binlog_format=mixed; +insert into tm set b=null, a=2; + +sync_slave_with_master; + +if (`select count(*) <> 3 from tm`) +{ + --echo Wrong result from SELECT on the slave side. + select * from tm; + --die +} + +# cleanup + +--connection master + +drop table tm, ti; +drop function multi_part_pk_with_autoinc; + +sync_slave_with_master; + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test b/mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test index 13c66f9f64b..b62a6e96437 100644 --- a/mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test +++ b/mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test @@ -206,4 +206,64 @@ SELECT * FROM t3; connection master; echo [on master]; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; + +--sync_slave_with_master + +# +# BUG#11754117 - 45670: INTVAR_EVENTS FOR FILTERED-OUT QUERY_LOG_EVENTS ARE EXECUTED +# Int-, Rand- and User- var events accompaning a filtered out Query-log-event should +# be filtered as well. +# +connection master; +CREATE TABLE test.t5 (a INT AUTO_INCREMENT PRIMARY KEY, b INT, c INT); # ignored on slave +CREATE TABLE test.t1 (a INT); # accepted on slave +INSERT INTO test.t1 VALUES(1); + +--sync_slave_with_master +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +CREATE TABLE test.t_slave (a INT AUTO_INCREMENT PRIMARY KEY, b INT, c INT); +CREATE TRIGGER t1_update AFTER UPDATE ON test.t1 FOR EACH ROW + INSERT INTO test.t_slave VALUES(NULL, ROUND(RAND() * 1000), @c); + +connection master; +SET INSERT_ID=2; +SET @c=2; +SET @@rand_seed1=10000000, @@rand_seed2=1000000; +INSERT INTO t5 VALUES (NULL, ROUND(RAND() * 1000), @c); # to be ignored +SELECT b into @b FROM test.t5; +--let $b_master=`select @b` +UPDATE test.t1 SET a=2; # to run trigger on slave + +--sync_slave_with_master + +# The proof: +SELECT a AS 'ONE' into @a FROM test.t_slave; +SELECT c AS 'NULL' into @c FROM test.t_slave; + +let $count= 1; +let $table= test.t_slave; +source include/wait_until_rows_count.inc; + +if (`SELECT @a != 2 and @c != NULL`) +{ + SELECT * FROM test.t_slave; + --die Intvar or user var from replication events unexpetedly escaped out to screw a following query applying context. +} + +SELECT b into @b FROM test.t_slave; +--let $b_slave=`select @b` + +if (`SELECT $b_slave = $b_master`) +{ + --echo Might be pure coincidence of two randoms from master and slave table. Don not panic yet. +} + +# cleanup BUG#11754117 +connection master; +drop table test.t5; +drop table test.t1; + +--sync_slave_with_master +drop table test.t_slave; + --source include/rpl_end.inc diff --git a/sql/log_event.cc b/sql/log_event.cc index 427f27650dd..8606964113c 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -5257,11 +5257,12 @@ void Intvar_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #endif +#if defined(HAVE_REPLICATION)&& !defined(MYSQL_CLIENT) + /* Intvar_log_event::do_apply_event() */ -#if defined(HAVE_REPLICATION)&& !defined(MYSQL_CLIENT) int Intvar_log_event::do_apply_event(Relay_log_info const *rli) { /* @@ -5270,6 +5271,9 @@ int Intvar_log_event::do_apply_event(Relay_log_info const *rli) */ const_cast(rli)->set_flag(Relay_log_info::IN_STMT); + if (rli->deferred_events_collecting) + return rli->deferred_events->add(this); + switch (type) { case LAST_INSERT_ID_EVENT: thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 1; @@ -5375,6 +5379,9 @@ int Rand_log_event::do_apply_event(Relay_log_info const *rli) */ const_cast(rli)->set_flag(Relay_log_info::IN_STMT); + if (rli->deferred_events_collecting) + return rli->deferred_events->add(this); + thd->rand.seed1= (ulong) seed1; thd->rand.seed2= (ulong) seed2; return 0; @@ -5401,6 +5408,29 @@ Rand_log_event::do_shall_skip(Relay_log_info *rli) return continue_group(rli); } +/** + Exec deferred Int-, Rand- and User- var events prefixing + a Query-log-event event. + + @param thd THD handle + + @return false on success, true if a failure in an event applying occurred. +*/ +bool slave_execute_deferred_events(THD *thd) +{ + bool res= false; + Relay_log_info *rli= thd->rli_slave; + + DBUG_ASSERT(rli && (!rli->deferred_events_collecting || rli->deferred_events)); + + if (!rli->deferred_events_collecting || rli->deferred_events->is_empty()) + return res; + + res= rli->deferred_events->execute(rli); + + return res; +} + #endif /* !MYSQL_CLIENT */ @@ -5785,6 +5815,10 @@ int User_var_log_event::do_apply_event(Relay_log_info const *rli) { Item *it= 0; CHARSET_INFO *charset; + + if (rli->deferred_events_collecting) + return rli->deferred_events->add(this); + if (!(charset= get_charset(charset_number, MYF(MY_WME)))) return 1; LEX_STRING user_var_name; diff --git a/sql/log_event.h b/sql/log_event.h index 7c707c29278..4c80148458c 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -3961,6 +3961,16 @@ static inline bool copy_event_cache_to_file_and_reinit(IO_CACHE *cache, reinit_io_cache(cache, WRITE_CACHE, 0, FALSE, TRUE); } +#ifndef MYSQL_CLIENT +/** + The function is called by slave applier in case there are + active table filtering rules to force gathering events associated + with Query-log-event into an array to execute + them once the fate of the Query is determined for execution. +*/ +bool slave_execute_deferred_events(THD *thd); +#endif + /** @} (end of group Replication) */ diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 6fe4cbd4ea6..904b5533f3b 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -45,7 +45,9 @@ Relay_log_info::Relay_log_info() inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), tables_to_lock(0), tables_to_lock_count(0), - last_event_start_time(0), m_flags(0) + last_event_start_time(0), + deferred_events(NULL), + m_flags(0) { DBUG_ENTER("Relay_log_info::Relay_log_info"); diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index f49be72db46..959105b16e0 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -369,6 +369,41 @@ public: */ time_t last_event_start_time; + /* + A container to hold on Intvar-, Rand-, Uservar- log-events in case + the slave is configured with table filtering rules. + The withhold events are executed when their parent Query destiny is + determined for execution as well. + */ + Deferred_log_events *deferred_events; + + /* + State of the container: true stands for IRU events gathering, + false does for execution, either deferred or direct. + */ + bool deferred_events_collecting; + + /* + Returns true if the argument event resides in the containter; + more specifically, the checking is done against the last added event. + */ + bool is_deferred_event(Log_event * ev) + { + return deferred_events_collecting ? deferred_events->is_last(ev) : false; + }; + /* The general cleanup that slave applier may need at the end of query. */ + inline void cleanup_after_query() + { + if (deferred_events) + deferred_events->rewind(); + }; + /* The general cleanup that slave applier may need at the end of session. */ + void cleanup_after_session() + { + if (deferred_events) + delete deferred_events; + }; + /** Helper function to do after statement completion. diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc index 46c93f7cd20..5f2b5cbf86b 100644 --- a/sql/rpl_utility.cc +++ b/sql/rpl_utility.cc @@ -226,3 +226,64 @@ table_def::compatible_with(Relay_log_info const *rli_arg, TABLE *table) return error; } + +#ifndef MYSQL_CLIENT +Deferred_log_events::Deferred_log_events(Relay_log_info *rli) : last_added(NULL) +{ + my_init_dynamic_array(&array, sizeof(Log_event *), 32, 16); +} + +Deferred_log_events::~Deferred_log_events() +{ + delete_dynamic(&array); +} + +int Deferred_log_events::add(Log_event *ev) +{ + last_added= ev; + insert_dynamic(&array, (uchar*) &ev); + return 0; +} + +bool Deferred_log_events::is_empty() +{ + return array.elements == 0; +} + +bool Deferred_log_events::execute(Relay_log_info *rli) +{ + bool res= false; + + DBUG_ASSERT(rli->deferred_events_collecting); + + rli->deferred_events_collecting= false; + for (uint i= 0; !res && i < array.elements; i++) + { + Log_event *ev= (* (Log_event **) + dynamic_array_ptr(&array, i)); + res= ev->apply_event(rli); + } + rli->deferred_events_collecting= true; + return res; +} + +void Deferred_log_events::rewind() +{ + /* + Reset preceeding Query log event events which execution was + deferred because of slave side filtering. + */ + if (!is_empty()) + { + for (uint i= 0; i < array.elements; i++) + { + Log_event *ev= *(Log_event **) dynamic_array_ptr(&array, i); + delete ev; + } + if (array.elements > array.max_element) + freeze_size(&array); + reset_dynamic(&array); + } +} + +#endif diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h index 4036a41bc60..81a10cca814 100644 --- a/sql/rpl_utility.h +++ b/sql/rpl_utility.h @@ -290,6 +290,24 @@ namespace { }; } + +class Deferred_log_events +{ +private: + DYNAMIC_ARRAY array; + Log_event *last_added; + +public: + Deferred_log_events(Relay_log_info *rli); + ~Deferred_log_events(); + /* queue for exection at Query-log-event time prior the Query */; + int add(Log_event *ev); + bool is_empty(); + bool execute(Relay_log_info *rli); + void rewind(); + bool is_last(Log_event *ev) { return ev == last_added; }; +}; + #endif // NB. number of printed bit values is limited to sizeof(buf) - 1 diff --git a/sql/slave.cc b/sql/slave.cc index be109a385da..0910d196583 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2360,7 +2360,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) used to read info about the relay log's format; it will be deleted when the SQL thread does not need it, i.e. when this thread terminates. */ - if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT && + !rli->is_deferred_event(ev)) { DBUG_PRINT("info", ("Deleting the event after it has been executed")); delete ev; @@ -2990,6 +2991,12 @@ pthread_handler_t handle_slave_sql(void *arg) goto err; } thd->init_for_queries(); + thd->rli_slave= rli; + if ((rli->deferred_events_collecting= rpl_filter->is_on())) + { + rli->deferred_events= new Deferred_log_events(rli); + } + thd->temporary_tables = rli->save_temporary_tables; // restore temp tables set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables pthread_mutex_lock(&LOCK_thread_count); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index ace78947054..ca24830db63 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -116,6 +116,8 @@ static void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, bool send_refresh); static bool has_write_table_with_auto_increment(TABLE_LIST *tables); +static bool +has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables); extern "C" uchar *table_cache_key(const uchar *record, size_t *length, @@ -5471,6 +5473,12 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) *(ptr++)= table->table; } + if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables) + { + if (has_write_table_auto_increment_not_first_in_pk(tables)) + thd->lex->set_stmt_unsafe(); + } + /* We have to emulate LOCK TABLES if we are statement needs prelocking. */ if (thd->lex->requires_prelocking()) { @@ -9065,6 +9073,32 @@ has_write_table_with_auto_increment(TABLE_LIST *tables) return 0; } +/* + Tells if there is a table whose auto_increment column is a part + of a compound primary key while is not the first column in + the table definition. + + @param tables Table list + + @return true if the table exists, fais if does not. +*/ + +static bool +has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables) +{ + for (TABLE_LIST *table= tables; table; table= table->next_global) + { + /* we must do preliminary checks as table->table may be NULL */ + if (!table->placeholder() && + table->table->found_next_number_field && + (table->lock_type >= TL_WRITE_ALLOW_WRITE) + && table->table->s->next_number_keypart != 0) + return 1; + } + + return 0; +} + /* Open and lock system tables for read. diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 32e2e90acd7..2f3e8153cbd 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -601,7 +601,7 @@ Diagnostics_area::disable_status() THD::THD() :Statement(&main_lex, &main_mem_root, CONVENTIONAL_EXECUTION, /* statement id */ 0), - Open_tables_state(refresh_version), rli_fake(0), + Open_tables_state(refresh_version), rli_fake(NULL), rli_slave(NULL), lock_id(&main_lock_id), user_time(0), in_sub_stmt(0), sql_log_bin_toplevel(false), @@ -1035,6 +1035,8 @@ THD::~THD() delete rli_fake; rli_fake= NULL; } + if (rli_slave) + rli_slave->cleanup_after_session(); #endif free_root(&main_mem_root, MYF(0)); @@ -1267,6 +1269,10 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= FALSE; +#ifndef EMBEDDED_LIBRARY + if (rli_slave) + rli_slave->cleanup_after_query(); +#endif } diff --git a/sql/sql_class.h b/sql/sql_class.h index 3c1b2c1330f..31d2c664aae 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1291,6 +1291,8 @@ class THD :public Statement, public: /* Used to execute base64 coded binlog events in MySQL server */ Relay_log_info* rli_fake; + /* Slave applier execution context */ + Relay_log_info* rli_slave; /* Constant for THD::where initialization in the beginning of every query. diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 14b778328d2..729a963eef1 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2248,6 +2248,11 @@ mysql_execute_command(THD *thd) } DBUG_RETURN(0); } + /* + Execute deferred events first + */ + if (slave_execute_deferred_events(thd)) + DBUG_RETURN(-1); } else { From cdaae1692b1e74c0fdeaaece82e3350a30e3bbd2 Mon Sep 17 00:00:00 2001 From: Nuno Carvalho Date: Fri, 20 Apr 2012 22:25:59 +0100 Subject: [PATCH 12/39] BUG#13979418: SHOW BINLOG EVENTS MAY CRASH THE SERVER The function mysql_show_binlog_events has a local stack variable 'LOG_INFO linfo;', which is assigned to thd->current_linfo, however this variable goes out of scope and is destroyed before clean thd->current_linfo. The problem is solved by moving 'LOG_INFO linfo;' to function scope. --- ...allel_show_binlog_events_purge_logs.result | 13 +++++++ ...arallel_show_binlog_events_purge_logs.test | 35 +++++++++++++++++++ sql/log.h | 5 +++ sql/sql_repl.cc | 5 ++- 4 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_show_binlog_events_purge_logs.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_show_binlog_events_purge_logs.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_show_binlog_events_purge_logs.result b/mysql-test/suite/rpl/r/rpl_parallel_show_binlog_events_purge_logs.result new file mode 100644 index 00000000000..b69deb17c4c --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_show_binlog_events_purge_logs.result @@ -0,0 +1,13 @@ +include/master-slave.inc +[connection master] +[connection slave] +SET DEBUG_SYNC= 'after_show_binlog_events SIGNAL on_show_binlog_events WAIT_FOR end'; +SHOW BINLOG EVENTS; +[connection slave1] +SET DEBUG_SYNC= 'now WAIT_FOR on_show_binlog_events'; +FLUSH LOGS; +SET DEBUG_SYNC= 'now SIGNAL end'; +SET DEBUG_SYNC= 'RESET'; +[connection slave] +SET DEBUG_SYNC= 'RESET'; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_show_binlog_events_purge_logs.test b/mysql-test/suite/rpl/t/rpl_parallel_show_binlog_events_purge_logs.test new file mode 100644 index 00000000000..16d986268c9 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_show_binlog_events_purge_logs.test @@ -0,0 +1,35 @@ +# BUG#13979418: SHOW BINLOG EVENTS MAY CRASH THE SERVER +# +# The function mysql_show_binlog_events has a local stack variable +# 'LOG_INFO linfo;', which is assigned to thd->current_linfo, however +# this variable goes out of scope and is destroyed before clean +# thd->current_linfo. +# +# This test case runs SHOW BINLOG EVENTS and FLUSH LOGS to make sure +# that with the fix local variable linfo is valid along all +# mysql_show_binlog_events function scope. +# +--source include/have_debug_sync.inc +--source include/master-slave.inc + +--echo [connection slave] +--connection slave +SET DEBUG_SYNC= 'after_show_binlog_events SIGNAL on_show_binlog_events WAIT_FOR end'; +--send SHOW BINLOG EVENTS + +--connection slave1 +--echo [connection slave1] +SET DEBUG_SYNC= 'now WAIT_FOR on_show_binlog_events'; +FLUSH LOGS; +SET DEBUG_SYNC= 'now SIGNAL end'; +SET DEBUG_SYNC= 'RESET'; + +--echo [connection slave] +--connection slave +--disable_result_log +--reap +--enable_result_log +SET DEBUG_SYNC= 'RESET'; + +--connection master +--source include/rpl_end.inc diff --git a/sql/log.h b/sql/log.h index 81342f6e696..9ed5db04e87 100644 --- a/sql/log.h +++ b/sql/log.h @@ -130,6 +130,11 @@ extern TC_LOG_DUMMY tc_log_dummy; class Relay_log_info; +/* + Note that we destroy the lock mutex in the desctructor here. + This means that object instances cannot be destroyed/go out of scope, + until we have reset thd->current_linfo to NULL; + */ typedef struct st_log_info { char log_file_name[FN_REFLEN]; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index a2c388ba388..b985805a827 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1454,6 +1454,8 @@ bool mysql_show_binlog_events(THD* thd) IO_CACHE log; File file = -1; int old_max_allowed_packet= thd->variables.max_allowed_packet; + LOG_INFO linfo; + DBUG_ENTER("mysql_show_binlog_events"); Log_event::init_show_field_list(&field_list); @@ -1480,7 +1482,6 @@ bool mysql_show_binlog_events(THD* thd) char search_file_name[FN_REFLEN], *name; const char *log_file_name = lex_mi->log_file_name; pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); - LOG_INFO linfo; Log_event* ev; unit->set_limit(thd->lex->current_select); @@ -1572,6 +1573,8 @@ bool mysql_show_binlog_events(THD* thd) pthread_mutex_unlock(log_lock); } + // Check that linfo is still on the function scope. + DEBUG_SYNC(thd, "after_show_binlog_events"); ret= FALSE; From dcb5071b1906d76b4c61d3125ddc7368f3ee8c4f Mon Sep 17 00:00:00 2001 From: Mayank Prasad Date: Sat, 21 Apr 2012 05:23:09 +0530 Subject: [PATCH 13/39] BUG#12427262 : 60961: SHOW TABLES VERY SLOW WHEN NOT IN SYSTEM DISK CACHE Details: - test case bug12427262.test was failing on windows because on windows '/' was not recognized. And this was used in LIKE clause of the query being run in this test case. Fix: - Windows needs '\\\\' for path seperater in mysql. I was not sure how to keep a single query with two different syntax based on platform. So modifying query to make sure it runs correctly on both platform. --- mysql-test/r/bug12427262.result | 19 +++++++++++-------- mysql-test/t/bug12427262.test | 15 +++++++++------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/mysql-test/r/bug12427262.result b/mysql-test/r/bug12427262.result index f0f7ae46ba2..c541ba0f664 100644 --- a/mysql-test/r/bug12427262.result +++ b/mysql-test/r/bug12427262.result @@ -1,8 +1,8 @@ # # Bug#12427262 : 60961: SHOW TABLES VERY SLOW WHEN NOT IN SYSTEM DISK CACHE. # -create database show_table_db; -use show_table_db; +create database show_table_lw_db; +use show_table_lw_db; create table t1 (c1 int); create table t2 (c1 int); create table t3 (c1 int); @@ -14,9 +14,10 @@ create table t8 (c1 int); create table t9 (c1 int); create table t10 (c1 int); select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_before; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_before; show tables; -Tables_in_show_table_db +Tables_in_show_table_lw_db t1 t10 t2 @@ -28,12 +29,13 @@ t7 t8 t9 select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_after; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_after; select @count_read_after-@count_read_before; @count_read_after-@count_read_before 0.000000000000000000000000000000 show full tables; -Tables_in_show_table_db Table_type +Tables_in_show_table_lw_db Table_type t1 BASE TABLE t10 BASE TABLE t2 BASE TABLE @@ -45,9 +47,10 @@ t7 BASE TABLE t8 BASE TABLE t9 BASE TABLE select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_after; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_after; select @count_read_after-@count_read_before; @count_read_after-@count_read_before 10.000000000000000000000000000000 drop table t1; -drop database show_table_db; +drop database show_table_lw_db; diff --git a/mysql-test/t/bug12427262.test b/mysql-test/t/bug12427262.test index b7193dd1125..aca37a651c4 100644 --- a/mysql-test/t/bug12427262.test +++ b/mysql-test/t/bug12427262.test @@ -6,8 +6,8 @@ --source include/have_perfschema.inc --disable_warnings -create database show_table_db; -use show_table_db; +create database show_table_lw_db; +use show_table_lw_db; create table t1 (c1 int); create table t2 (c1 int); create table t3 (c1 int); @@ -22,14 +22,16 @@ create table t10 (c1 int); # Query PS to know initial read count for frm file. select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_before; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_before; show tables; # Query PS to know read count for frm file after above query. It should # not be changed as FRM file will not be opened for above query. select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_after; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_after; select @count_read_after-@count_read_before; @@ -38,11 +40,12 @@ show full tables; # Query PS to know read count for frm file after above query. COUNT_READ # will be incremented by 1 as FRM file will be opened for above query. select Sum(ALL(COUNT_READ)) from performance_schema.file_summary_by_instance where FILE_NAME -like "%show_table_db/%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' into @count_read_after; +like "%show_table_lw_db%" AND FILE_NAME like "%.frm%" AND EVENT_NAME='wait/io/file/sql/FRM' +into @count_read_after; select @count_read_after-@count_read_before; --disable_warnings drop table t1; -drop database show_table_db; +drop database show_table_lw_db; --enable_warnings From 6d74c922af3d37debfe3a42803d5c86b56dd936e Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Sat, 21 Apr 2012 14:19:06 +0300 Subject: [PATCH 14/39] merge bug11754117-45670 fixes from 5.1: fixing result files. --- .../suite/rpl/r/rpl_auto_increment_bug45679.result | 9 +++++++-- .../suite/rpl/r/rpl_filter_tables_not_exist.result | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result index 75f4e661dc1..198ecfb6724 100644 --- a/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result +++ b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result @@ -9,10 +9,10 @@ set @@binlog_format=statement; *** autoincrement field is not the first in PK warning must be there: *** insert into tm set b=null, a=1; Warnings: -Note 1592 Statement may not be safe to log in statement format. +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. INSERT into autoincrement field which is not the first part in the composed primary key is unsafe. show warnings; Level Code Message -Note 1592 Statement may not be safe to log in statement format. +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. INSERT into autoincrement field which is not the first part in the composed primary key is unsafe. *** no warning when autoincrement is the first in PK insert into ti set b=null, a=1; show warnings; @@ -26,9 +26,14 @@ end// select multi_part_pk_with_autoinc (3); multi_part_pk_with_autoinc (3) 3 +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it invokes a trigger or a stored function that inserts into an AUTO_INCREMENT column. Inserted values cannot be logged correctly. +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. INSERT into autoincrement field which is not the first part in the composed primary key is unsafe. *** autoincrement field is not the first in PK warning must be there: *** show warnings; Level Code Message +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it invokes a trigger or a stored function that inserts into an AUTO_INCREMENT column. Inserted values cannot be logged correctly. +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. INSERT into autoincrement field which is not the first part in the composed primary key is unsafe. set @@binlog_format=mixed; insert into tm set b=null, a=2; drop table tm, ti; diff --git a/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result b/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result index f0dc97b71f2..025ef690ed7 100644 --- a/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result +++ b/mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result @@ -126,7 +126,7 @@ SET @c=2; SET @@rand_seed1=10000000, @@rand_seed2=1000000; INSERT INTO t5 VALUES (NULL, ROUND(RAND() * 1000), @c); Warnings: -Note 1592 Statement may not be safe to log in statement format. +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave. SELECT b into @b FROM test.t5; UPDATE test.t1 SET a=2; SELECT a AS 'ONE' into @a FROM test.t_slave; From da7436a2a88a94440b746ad6600450569a50923b Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Sat, 21 Apr 2012 15:06:06 +0300 Subject: [PATCH 15/39] merge bug11754117-45670 fixes from 5.1: fixing comments in sql_lex.h. --- sql/sql_lex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 992ed551564..111a22c7ff9 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1330,7 +1330,8 @@ public: BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS, /** - INSERT into auto-inc field which is not the first field in the table. + INSERT into auto-inc field which is not the first part of composed + primary key. */ BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST, From bd0c38064faf52e65ee8d2044f15b6393b44522d Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Mon, 23 Apr 2012 11:51:19 +0300 Subject: [PATCH 16/39] BUG#11754117 rpl_auto_increment_bug45679.test is refined due to not fixed in 5.1 Bug11749859-39934. --- mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result | 2 +- mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result index 75f4e661dc1..e2b3206eb02 100644 --- a/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result +++ b/mysql-test/suite/rpl/r/rpl_auto_increment_bug45679.result @@ -26,7 +26,7 @@ end// select multi_part_pk_with_autoinc (3); multi_part_pk_with_autoinc (3) 3 -*** autoincrement field is not the first in PK warning must be there: *** +*** No warnings in 5.1 because of Bug11749859-39934 *** show warnings; Level Code Message set @@binlog_format=mixed; diff --git a/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test b/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test index 6996e1c73c7..0df086ddbfa 100644 --- a/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test +++ b/mysql-test/suite/rpl/t/rpl_auto_increment_bug45679.test @@ -35,7 +35,7 @@ end// delimiter ;// select multi_part_pk_with_autoinc (3); ---echo *** autoincrement field is not the first in PK warning must be there: *** +--echo *** No warnings in 5.1 because of Bug11749859-39934 *** show warnings; set @@binlog_format=mixed; From 2415d955c8fff4a2001f4d1774a75f156b787e7c Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Mon, 23 Apr 2012 06:39:16 -0400 Subject: [PATCH 17/39] Bug#12677594 - 61575: INNODB: WARNING: IO_SETUP() FAILED WITH EAGAIN. rb://1033 approved by: Marko Makela Check return value from os_aio_init() and refuse to start if it fails. --- storage/innobase/srv/srv0start.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index 343e41fe376..86669a50895 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -1364,10 +1364,18 @@ innobase_start_or_create_for_mysql(void) } # endif /* __WIN__ */ - os_aio_init(io_limit, - srv_n_read_io_threads, - srv_n_write_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); + if (!os_aio_init(io_limit, + srv_n_read_io_threads, + srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS)) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Fatal error: cannot initialize AIO" + " sub-system\n"); + + return(DB_ERROR); + } fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files); From 678f221cb9ac3ba18b70568077d8979adb1ac44c Mon Sep 17 00:00:00 2001 From: Kent Boortz Date: Mon, 23 Apr 2012 12:52:14 +0200 Subject: [PATCH 18/39] Allow Windows absolute paths in N:\ formatfor the --vardir option --- mysql-test/mysql-test-run.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 43734d04286..927a2ebfa91 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -1442,7 +1442,7 @@ sub command_line_setup { # We make the path absolute, as the server will do a chdir() before usage unless ( $opt_vardir =~ m,^/, or - (IS_WINDOWS and $opt_vardir =~ m,^[a-z]:/,i) ) + (IS_WINDOWS and $opt_vardir =~ m,^[a-z]:[/\\],i) ) { # Make absolute path, relative test dir $opt_vardir= "$glob_mysql_test_dir/$opt_vardir"; From e881e03e01e1c27fa4d7715aec750d34af56aadd Mon Sep 17 00:00:00 2001 From: Georgi Kodinov Date: Mon, 23 Apr 2012 16:26:22 +0300 Subject: [PATCH 19/39] Bug #59148 - made tests experimental Fixed a cmake 2.8.8 compilation problem. --- configure.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure.cmake b/configure.cmake index c3cc787ebc8..b8fa70db203 100644 --- a/configure.cmake +++ b/configure.cmake @@ -149,7 +149,9 @@ IF(UNIX) SET(CMAKE_REQUIRED_LIBRARIES ${LIBM} ${LIBNSL} ${LIBBIND} ${LIBCRYPT} ${LIBSOCKET} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} ${LIBRT}) - LIST(REMOVE_DUPLICATES CMAKE_REQUIRED_LIBRARIES) + IF(NOT ${CMAKE_REQUIRED_LIBRARIES} MATCHES '') + LIST(REMOVE_DUPLICATES ${CMAKE_REQUIRED_LIBRARIES}) + ENDIF() LINK_LIBRARIES(${CMAKE_THREAD_LIBS_INIT}) OPTION(WITH_LIBWRAP "Compile with tcp wrappers support" OFF) From b872ce0312022b5ac86a368305c34ce9fda7d720 Mon Sep 17 00:00:00 2001 From: Georgi Kodinov Date: Mon, 23 Apr 2012 17:18:55 +0300 Subject: [PATCH 20/39] Fixed a cmake compile problem because of the 2.8.8 fix. --- configure.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configure.cmake b/configure.cmake index b8fa70db203..5b1afd66439 100644 --- a/configure.cmake +++ b/configure.cmake @@ -149,8 +149,9 @@ IF(UNIX) SET(CMAKE_REQUIRED_LIBRARIES ${LIBM} ${LIBNSL} ${LIBBIND} ${LIBCRYPT} ${LIBSOCKET} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} ${LIBRT}) - IF(NOT ${CMAKE_REQUIRED_LIBRARIES} MATCHES '') - LIST(REMOVE_DUPLICATES ${CMAKE_REQUIRED_LIBRARIES}) + LIST(LENGTH CMAKE_REQUIRED_LIBRARIES required_libs_length) + IF(${required_libs_length} GREATER 0) + LIST(REMOVE_DUPLICATES CMAKE_REQUIRED_LIBRARIES) ENDIF() LINK_LIBRARIES(${CMAKE_THREAD_LIBS_INIT}) From 08ead005b11a2806e45f395dd4b36ecd0f471b7c Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Mon, 23 Apr 2012 22:15:29 -0400 Subject: [PATCH 21/39] Bug#13990648: 65061: LRU FLUSH RATE CALCULATION IS BASED ON INVALID VALUES rb://1043 approved by: Sunny Bains Two internal counters were incremented twice for a single operations. The counters are: srv_buf_pool_flushed buf_lru_flush_page_count --- storage/innobase/buf/buf0flu.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c index 6e8e8fdda5a..7cd09d6675e 100644 --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -1750,8 +1750,6 @@ buf_flush_batch( } #endif /* UNIV_DEBUG */ - srv_buf_pool_flushed += count; - return(count); } @@ -1778,13 +1776,6 @@ buf_flush_common( #endif /* UNIV_DEBUG */ srv_buf_pool_flushed += page_count; - - if (flush_type == BUF_FLUSH_LRU) { - /* We keep track of all flushes happening as part of LRU - flush. When estimating the desired rate at which flush_list - should be flushed we factor in this value. */ - buf_lru_flush_page_count += page_count; - } } /******************************************************************//** From 3f98e95354b6d1c55a824bb1b314e3bb8c465ef0 Mon Sep 17 00:00:00 2001 From: Manish Kumar Date: Thu, 26 Apr 2012 19:34:03 +0530 Subject: [PATCH 22/39] BUG#13812374 - RPL.RPL_REPORT_PORT FAILS OCCASIONALLY ON PB2 Problem - The failure on PB2 is possbily due to the port number being still in use even after the server restarts which is not reflected in the server restart. Fix - The problem is fixed by starting the servers forcefully using the option file and also the parameters for the server restart is passed correctly. mysql-test/suite/rpl/t/rpl_report_port-master.opt: Option file for the master. --- mysql-test/suite/rpl/r/rpl_report_port.result | 4 ++-- mysql-test/suite/rpl/t/rpl_report_port-master.opt | 1 + mysql-test/suite/rpl/t/rpl_report_port.test | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 mysql-test/suite/rpl/t/rpl_report_port-master.opt diff --git a/mysql-test/suite/rpl/r/rpl_report_port.result b/mysql-test/suite/rpl/r/rpl_report_port.result index d9887f41ff9..6a9ac341780 100644 --- a/mysql-test/suite/rpl/r/rpl_report_port.result +++ b/mysql-test/suite/rpl/r/rpl_report_port.result @@ -3,8 +3,8 @@ include/master-slave.inc include/rpl_restart_server.inc [server_number=2 parameters: --report-port=9000] include/start_slave.inc [Slave restarted with the report-port set to some value] -include/assert.inc [The value shown for the slave's port number is 9000 which is the value set for report-port.] -include/rpl_restart_server.inc [server_number=2 parameters: --report-port=] +include/assert.inc [The value shown for the slave's port number is user specified port number which is the value set for report-port.] +include/rpl_restart_server.inc [server_number=2] include/start_slave.inc [Slave restarted with the report-port set to the value of slave's port number] include/assert.inc [The default value shown for the slave's port number is the actual port number of the slave.] diff --git a/mysql-test/suite/rpl/t/rpl_report_port-master.opt b/mysql-test/suite/rpl/t/rpl_report_port-master.opt new file mode 100644 index 00000000000..cef79bc8585 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_report_port-master.opt @@ -0,0 +1 @@ +--force-restart diff --git a/mysql-test/suite/rpl/t/rpl_report_port.test b/mysql-test/suite/rpl/t/rpl_report_port.test index 379f9aed946..12c34ce01b1 100644 --- a/mysql-test/suite/rpl/t/rpl_report_port.test +++ b/mysql-test/suite/rpl/t/rpl_report_port.test @@ -38,21 +38,23 @@ connection master; # 9000 is the value of the port we should get. --let $report_port= query_get_value(SHOW SLAVE HOSTS, Port, 1) ---let assert_text= The value shown for the slave's port number is 9000 which is the value set for report-port. +--let assert_text= The value shown for the slave's port number is user specified port number which is the value set for report-port. --let assert_cond= $report_port = "9000" --source include/assert.inc - # Start the server with the report-port being passed with no value. So on SHOW SLAVE HOSTS # on the master the value of slave's port should be the actual value of the slave port. +connection master; --let $rpl_server_number= 2 ---let $rpl_server_parameters= --report-port= +--let $rpl_server_parameters= --source include/rpl_restart_server.inc connection slave; --source include/start_slave.inc +connection master; +sync_slave_with_master; --echo [Slave restarted with the report-port set to the value of slave's port number] connection master; From 1a2cf649dc411b47122419cb9e8c8ef1a26ae91a Mon Sep 17 00:00:00 2001 From: irana Date: Thu, 26 Apr 2012 08:17:14 -0700 Subject: [PATCH 23/39] InnoDB: Adjust error message when a dropped tablespace is accessed. --- storage/innodb_plugin/fil/fil0fil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c index eebfbd9ccad..6f2ab938042 100644 --- a/storage/innodb_plugin/fil/fil0fil.c +++ b/storage/innodb_plugin/fil/fil0fil.c @@ -1853,7 +1853,7 @@ fil_inc_pending_ops( if (space == NULL) { fprintf(stderr, - "InnoDB: Error: trying to do ibuf merge to a" + "InnoDB: Error: trying to do an operation on a" " dropped tablespace %lu\n", (ulong) id); } From 08e7444e549722f61dd90fdf23ce05632c72044e Mon Sep 17 00:00:00 2001 From: Yasufumi Kinoshita Date: Fri, 27 Apr 2012 19:38:13 +0900 Subject: [PATCH 24/39] Bug#11758510 (#50723): INNODB CHECK TABLE FATAL SEMAPHORE WAIT TIMEOUT POSSIBLY TOO SHORT FOR BI Fixed not to check timeout during the check table. --- storage/innobase/dict/dict0load.c | 4 ++-- storage/innobase/include/srv0srv.h | 1 + storage/innobase/row/row0mysql.c | 4 ++-- storage/innobase/sync/sync0arr.c | 5 +++++ storage/innodb_plugin/dict/dict0load.c | 4 ++-- storage/innodb_plugin/handler/ha_innodb.cc | 4 ++-- storage/innodb_plugin/include/srv0srv.h | 1 + storage/innodb_plugin/sync/sync0arr.c | 5 +++++ 8 files changed, 20 insertions(+), 8 deletions(-) diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 7e820cfb08d..3c3c0d10c7c 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -150,7 +150,7 @@ dict_print(void) monitor printout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); mutex_enter(&(dict_sys->mutex)); @@ -178,7 +178,7 @@ loop: /* Restore the fatal semaphore wait timeout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); return; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 811074b2be8..aa6c88e0538 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -173,6 +173,7 @@ extern ibool srv_print_latch_waits; extern ulint srv_activity_count; extern ulint srv_fatal_semaphore_wait_threshold; +#define SRV_SEMAPHORE_WAIT_EXTENSION 7200 extern ulint srv_dml_needed_delay; extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index 6148b01af9d..16fd2b3b482 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -4165,7 +4165,7 @@ row_check_table_for_mysql( /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); index = dict_table_get_first_index(table); @@ -4223,7 +4223,7 @@ row_check_table_for_mysql( /* Restore the fatal lock wait timeout after CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); prebuilt->trx->op_info = ""; diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index 93a7398f252..60a96fe4388 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -931,6 +931,11 @@ sync_array_print_long_waits( ibool fatal = FALSE; double longest_diff = 0; + /* For huge tables, skip the check during CHECK TABLE etc... */ + if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) { + return(FALSE); + } + for (i = 0; i < sync_primary_wait_array->n_cells; i++) { double diff; diff --git a/storage/innodb_plugin/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c index 7a0b6edcb08..7231fadabe1 100644 --- a/storage/innodb_plugin/dict/dict0load.c +++ b/storage/innodb_plugin/dict/dict0load.c @@ -164,7 +164,7 @@ dict_print(void) monitor printout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); mutex_enter(&(dict_sys->mutex)); @@ -192,7 +192,7 @@ loop: /* Restore the fatal semaphore wait timeout */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); return; diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index f16b9fabd20..30448bccf7b 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -8148,7 +8148,7 @@ ha_innobase::check( /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); for (index = dict_table_get_first_index(prebuilt->table); @@ -8244,7 +8244,7 @@ ha_innobase::check( /* Restore the fatal lock wait timeout after CHECK TABLE. */ mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; mutex_exit(&kernel_mutex); prebuilt->trx->op_info = ""; diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 7c63a5f0d45..1a9f54882c5 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -244,6 +244,7 @@ extern ibool srv_print_latch_waits; extern ulint srv_activity_count; extern ulint srv_fatal_semaphore_wait_threshold; +#define SRV_SEMAPHORE_WAIT_EXTENSION 7200 extern ulint srv_dml_needed_delay; extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, diff --git a/storage/innodb_plugin/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c index 13970023573..90ea50b641b 100644 --- a/storage/innodb_plugin/sync/sync0arr.c +++ b/storage/innodb_plugin/sync/sync0arr.c @@ -927,6 +927,11 @@ sync_array_print_long_waits( ibool fatal = FALSE; double longest_diff = 0; + /* For huge tables, skip the check during CHECK TABLE etc... */ + if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) { + return(FALSE); + } + for (i = 0; i < sync_primary_wait_array->n_cells; i++) { double diff; From 6fa011056abf534bf3d544cdef14f3a539777b6d Mon Sep 17 00:00:00 2001 From: Alexander Nozdrin Date: Fri, 27 Apr 2012 18:42:27 +0400 Subject: [PATCH 25/39] Follow-up for Bug#12762885 - 61713: MYSQL WILL NOT BIND TO "LOCALHOST" IF LOCALHOST IS BOTH IPV4/IPV6 ENABLED. The original patch removed default value of the bind-address option. So, the default value became NULL. By coincedence NULL resolves to 0.0.0.0 and ::, and since the server chooses first IPv4-address, 0.0.0.0 is choosen. So, there was no change in the behaviour. This patch restores default value of the bind-address option to "0.0.0.0". --- sql/mysqld.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 547378d4661..e90bd8dad33 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1841,6 +1841,9 @@ static void network_init(void) struct addrinfo *ai, *a; struct addrinfo hints; + if (!my_bind_addr_str) + my_bind_addr_str= (char *) "0.0.0.0"; + sql_print_information("Server hostname (bind-address): '%s'; port: %d", my_bind_addr_str, mysqld_port); From 25cdec81e082049a668552f631fa32a838d915e3 Mon Sep 17 00:00:00 2001 From: Alexander Nozdrin Date: Fri, 27 Apr 2012 19:14:28 +0400 Subject: [PATCH 26/39] Proper follow-up for Bug#12762885 - 61713: MYSQL WILL NOT BIND TO "LOCALHOST" IF LOCALHOST IS BOTH IPV4/IPV6 ENABLED. The original patch removed default value of the bind-address option. So, the default value became NULL. By coincedence NULL resolves to 0.0.0.0 and ::, and since the server chooses first IPv4-address, 0.0.0.0 is choosen. So, there was no change in the behaviour. This patch restores default value of the bind-address option to "0.0.0.0". --- mysql-test/r/mysqld--help-notwin.result | 2 +- mysql-test/r/mysqld--help-win.result | 2 +- sql/mysqld.cc | 8 +------- sql/mysqld.h | 1 + sql/sys_vars.cc | 5 +++++ 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index 36649cf761d..ac8edfc5083 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -747,7 +747,7 @@ autocommit TRUE automatic-sp-privileges TRUE back-log 50 big-tables FALSE -bind-address (No default value) +bind-address 0.0.0.0 binlog-cache-size 32768 binlog-direct-non-transactional-updates FALSE binlog-format STATEMENT diff --git a/mysql-test/r/mysqld--help-win.result b/mysql-test/r/mysqld--help-win.result index db7dd264b76..d47b6f0c1a5 100644 --- a/mysql-test/r/mysqld--help-win.result +++ b/mysql-test/r/mysqld--help-win.result @@ -755,7 +755,7 @@ autocommit TRUE automatic-sp-privileges TRUE back-log 50 big-tables FALSE -bind-address (No default value) +bind-address 0.0.0.0 binlog-cache-size 32768 binlog-direct-non-transactional-updates FALSE binlog-format STATEMENT diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e90bd8dad33..936167280e0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -341,7 +341,7 @@ static char *default_character_set_name; static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; -static char *my_bind_addr_str; +char *my_bind_addr_str; static char *default_collation_name; char *default_storage_engine; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; @@ -1841,9 +1841,6 @@ static void network_init(void) struct addrinfo *ai, *a; struct addrinfo hints; - if (!my_bind_addr_str) - my_bind_addr_str= (char *) "0.0.0.0"; - sql_print_information("Server hostname (bind-address): '%s'; port: %d", my_bind_addr_str, mysqld_port); @@ -5713,9 +5710,6 @@ struct my_option my_long_options[]= {"autocommit", 0, "Set default value for autocommit (0 or 1)", &opt_autocommit, &opt_autocommit, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, NULL}, - {"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.", - &my_bind_addr_str, &my_bind_addr_str, 0, GET_STR, - REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"binlog-do-db", OPT_BINLOG_DO_DB, "Tells the master it should log updates for the specified database, " "and exclude all others not explicitly mentioned.", diff --git a/sql/mysqld.h b/sql/mysqld.h index 2604e889ebd..7fef4c6af82 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -146,6 +146,7 @@ extern char *opt_backup_history_logname, *opt_backup_progress_logname, extern const char *log_output_str; extern const char *log_backup_output_str; extern char *mysql_home_ptr, *pidfile_name_ptr; +extern char *my_bind_addr_str; extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN]; extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; extern char default_logfile_name[FN_REFLEN]; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index f15664bca10..9c7421c46f2 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -308,6 +308,11 @@ static Sys_var_charptr Sys_basedir( READ_ONLY GLOBAL_VAR(mysql_home_ptr), CMD_LINE(REQUIRED_ARG, 'b'), IN_FS_CHARSET, DEFAULT(0)); +static Sys_var_charptr Sys_my_bind_addr( + "bind_address", "IP address to bind to.", + READ_ONLY GLOBAL_VAR(my_bind_addr_str), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT("0.0.0.0")); + static Sys_var_ulong Sys_binlog_cache_size( "binlog_cache_size", "The size of the transactional cache for " "updates to transactional engines for the binary log. " From 476762bd7b3bbe6c1f89858174c0dde5e44b361b Mon Sep 17 00:00:00 2001 From: Alexander Nozdrin Date: Fri, 27 Apr 2012 21:07:53 +0400 Subject: [PATCH 27/39] Revert two follow-ups for Bug#12762885: - alexander.nozdrin@oracle.com-20120427151428-7llk1mlwx8xmbx0t - alexander.nozdrin@oracle.com-20120427144227-kltwiuu8snds4j3l. --- mysql-test/r/mysqld--help-notwin.result | 2 +- mysql-test/r/mysqld--help-win.result | 2 +- sql/mysqld.cc | 5 ++++- sql/mysqld.h | 1 - sql/sys_vars.cc | 5 ----- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index ac8edfc5083..36649cf761d 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -747,7 +747,7 @@ autocommit TRUE automatic-sp-privileges TRUE back-log 50 big-tables FALSE -bind-address 0.0.0.0 +bind-address (No default value) binlog-cache-size 32768 binlog-direct-non-transactional-updates FALSE binlog-format STATEMENT diff --git a/mysql-test/r/mysqld--help-win.result b/mysql-test/r/mysqld--help-win.result index d47b6f0c1a5..db7dd264b76 100644 --- a/mysql-test/r/mysqld--help-win.result +++ b/mysql-test/r/mysqld--help-win.result @@ -755,7 +755,7 @@ autocommit TRUE automatic-sp-privileges TRUE back-log 50 big-tables FALSE -bind-address 0.0.0.0 +bind-address (No default value) binlog-cache-size 32768 binlog-direct-non-transactional-updates FALSE binlog-format STATEMENT diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 936167280e0..547378d4661 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -341,7 +341,7 @@ static char *default_character_set_name; static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; -char *my_bind_addr_str; +static char *my_bind_addr_str; static char *default_collation_name; char *default_storage_engine; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; @@ -5710,6 +5710,9 @@ struct my_option my_long_options[]= {"autocommit", 0, "Set default value for autocommit (0 or 1)", &opt_autocommit, &opt_autocommit, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, NULL}, + {"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.", + &my_bind_addr_str, &my_bind_addr_str, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"binlog-do-db", OPT_BINLOG_DO_DB, "Tells the master it should log updates for the specified database, " "and exclude all others not explicitly mentioned.", diff --git a/sql/mysqld.h b/sql/mysqld.h index 7fef4c6af82..2604e889ebd 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -146,7 +146,6 @@ extern char *opt_backup_history_logname, *opt_backup_progress_logname, extern const char *log_output_str; extern const char *log_backup_output_str; extern char *mysql_home_ptr, *pidfile_name_ptr; -extern char *my_bind_addr_str; extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN]; extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; extern char default_logfile_name[FN_REFLEN]; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 9c7421c46f2..f15664bca10 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -308,11 +308,6 @@ static Sys_var_charptr Sys_basedir( READ_ONLY GLOBAL_VAR(mysql_home_ptr), CMD_LINE(REQUIRED_ARG, 'b'), IN_FS_CHARSET, DEFAULT(0)); -static Sys_var_charptr Sys_my_bind_addr( - "bind_address", "IP address to bind to.", - READ_ONLY GLOBAL_VAR(my_bind_addr_str), CMD_LINE(REQUIRED_ARG), - IN_FS_CHARSET, DEFAULT("0.0.0.0")); - static Sys_var_ulong Sys_binlog_cache_size( "binlog_cache_size", "The size of the transactional cache for " "updates to transactional engines for the binary log. " From 95205bbaffed8adb80185af5fa24aef697b6c9d4 Mon Sep 17 00:00:00 2001 From: Alexander Nozdrin Date: Fri, 27 Apr 2012 21:14:35 +0400 Subject: [PATCH 28/39] Third attempt to do a follow-up for Bug#12762885 - 61713: MYSQL WILL NOT BIND TO "LOCALHOST" IF LOCALHOST IS BOTH IPV4/IPV6 ENABLED. Previous commit comments were wrong. The default value has always been NULL. The original patch for Bug#12762885 just makes it visible in the logs. This patch uses "0.0.0.0" string if bind-address is not set. --- sql/mysqld.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 547378d4661..63656460f42 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1840,9 +1840,13 @@ static void network_init(void) { struct addrinfo *ai, *a; struct addrinfo hints; + const char *bind_address= my_bind_addr_str; + + if (!bind_address) + bind_address= "0.0.0.0"; sql_print_information("Server hostname (bind-address): '%s'; port: %d", - my_bind_addr_str, mysqld_port); + bind_address, mysqld_port); // Get list of IP-addresses associated with the server hostname. bzero(&hints, sizeof (hints)); @@ -1851,7 +1855,7 @@ static void network_init(void) hints.ai_family= AF_UNSPEC; my_snprintf(port_buf, NI_MAXSERV, "%d", mysqld_port); - if (getaddrinfo(my_bind_addr_str, port_buf, &hints, &ai)) + if (getaddrinfo(bind_address, port_buf, &hints, &ai)) { sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR)); /* purecov: tested */ sql_print_error("Can't start server: cannot resolve hostname!"); @@ -1871,7 +1875,7 @@ static void network_init(void) } sql_print_information(" - '%s' resolves to '%s';", - my_bind_addr_str, ip_addr); + bind_address, ip_addr); } /* From b757c13078ec88bb734c8697a9e3ecd870934f8a Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Fri, 4 May 2012 12:29:49 +0530 Subject: [PATCH 29/39] In perl, to break out of a foreach loop we need to use the keyword "last" and not "break". Fixing the failing test case. --- mysql-test/suite/innodb/t/innodb_bug12902967.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/t/innodb_bug12902967.test b/mysql-test/suite/innodb/t/innodb_bug12902967.test index 7bc5727a7a6..e9d832f3c19 100644 --- a/mysql-test/suite/innodb/t/innodb_bug12902967.test +++ b/mysql-test/suite/innodb/t/innodb_bug12902967.test @@ -33,7 +33,7 @@ foreach $line (reverse @lines) { ++$count; print "$line"; if ($count == 2) { - break; + last; } } } From daafaa0f86208fb5ddb13468e79772abef46cbd1 Mon Sep 17 00:00:00 2001 From: Venkata Sidagam Date: Fri, 4 May 2012 18:33:34 +0530 Subject: [PATCH 30/39] Bug #11754178 45740: MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM Problem Statement: ------------------ mysqldump is not having the dump stmts for general_log and slow_log tables. That is because of the fix for Bug#26121. Hence, after dropping the mysql database, and applying the dump by enabling the logging, "'general_log' table not found" errors are logged into the server log file. Analysis: --------- As part of the fix for Bug#26121, we skipped the dumping of tables for general_log and slow_log, because the data dump of those tables are taking LOCKS, which is not allowed for log tables. Fix: ---- We came up with an approach that instead of taking both meta data and data dump information for those tables, take only the meta data dump which doesn't need LOCKS. As part of fixing the issue we came up with below algorithm. Design before fix: 1) mysql database is having tables like db, event,... general_log, ... slow_log... 2) Skip general_log and slow_log while preparing the tables list 3) Take the TL_READ lock on tables which are present in the table list and do 'show create table'. 4) Release the lock. Design with the fix: 1) mysql database is having tables like db, event,... general_log, ... slow_log... 2) Skip general_log and slow_log while preparing the tables list 3) Explicitly call the 'show create table' for general_log and slow_log 3) Take the TL_READ lock on tables which are present in the table list and do 'show create table'. 4) Release the lock. While taking the meta data dump for general_log and slow_log the "CREATE TABLE" is replaced with "CREATE TABLE IF NOT EXISTS". This is because we skipped "DROP TABLE" for those tables, "DROP TABLE" fails for these tables if logging is enabled. Customer is applying the dump by enabling logging so, if the dump has "DROP TABLE" it will fail. Hence, removed the "DROP TABLE" stmts for those tables. After the fix we could observe "Table 'mysql.general_log' doesn't exist" errors initially that is because in the customer scenario they are dropping the mysql database by enabling the logging, Hence, those errors are expected. Once we apply the dump which is taken before the "drop database mysql", the errors will not be there. client/mysqldump.c: In get_table_structure() added code to skip the DROP TABLE stmts for general_log and slow_log tables, because when logging is enabled those stmts will fail. And replaced CREATE TABLE with CREATE IF NOT EXISTS for those tables, just to make sure CREATE stmt for those tables doesn't fail since we removed DROP stmts for those tables. In dump_all_tables_in_db() added code to call get_table_structure() for general_log and slow_log tables. mysql-test/r/mysqldump.result: Added a test as part of fix for Bug #11754178 mysql-test/t/mysqldump.test: Added a test as part of fix for Bug #11754178 --- client/mysqldump.c | 58 ++++++++++++++++++++++++++++++----- mysql-test/r/mysqldump.result | 42 +++++++++++++++++++++++++ mysql-test/t/mysqldump.test | 29 ++++++++++++++++-- 3 files changed, 118 insertions(+), 11 deletions(-) diff --git a/client/mysqldump.c b/client/mysqldump.c index 8601b533849..dc124f1b335 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -84,6 +84,15 @@ #define IGNORE_DATA 0x01 /* don't dump data for this table */ #define IGNORE_INSERT_DELAYED 0x02 /* table doesn't support INSERT DELAYED */ +/* general_log or slow_log tables under mysql database */ +static inline my_bool general_log_or_slow_log_tables(const char *db, + const char *table) +{ + return (strcmp(db, "mysql") == 0) && + ((strcmp(table, "general_log") == 0) || + (strcmp(table, "slow_log") == 0)); +} + static void add_load_option(DYNAMIC_STRING *str, const char *option, const char *option_value); static ulong find_set(TYPELIB *lib, const char *x, uint length, @@ -2458,6 +2467,7 @@ static uint get_table_structure(char *table, char *db, char *table_type, "TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'"; FILE *sql_file= md_result_file; int len; + my_bool is_log_table; MYSQL_RES *result; MYSQL_ROW row; DBUG_ENTER("get_table_structure"); @@ -2542,9 +2552,12 @@ static uint get_table_structure(char *table, char *db, char *table_type, /* Even if the "table" is a view, we do a DROP TABLE here. The view-specific code below fills in the DROP VIEW. + We will skip the DROP TABLE for general_log and slow_log, since + those stmts will fail, in case we apply dump by enabling logging. */ - fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", - opt_quoted_table); + if (!general_log_or_slow_log_tables(db, table)) + fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", + opt_quoted_table); check_io(sql_file); } @@ -2656,12 +2669,25 @@ static uint get_table_structure(char *table, char *db, char *table_type, row= mysql_fetch_row(result); - fprintf(sql_file, (opt_compatible_mode & 3) ? "%s;\n" : - "/*!40101 SET @saved_cs_client = @@character_set_client */;\n" - "/*!40101 SET character_set_client = utf8 */;\n" - "%s;\n" - "/*!40101 SET character_set_client = @saved_cs_client */;\n", - row[1]); + is_log_table= general_log_or_slow_log_tables(db, table); + if (is_log_table) + row[1]+= 13; /* strlen("CREATE TABLE ")= 13 */ + if (opt_compatible_mode & 3) + { + fprintf(sql_file, + is_log_table ? "CREATE TABLE IF NOT EXISTS %s;\n" : "%s;\n", + row[1]); + } + else + { + fprintf(sql_file, + "/*!40101 SET @saved_cs_client = @@character_set_client */;\n" + "/*!40101 SET character_set_client = utf8 */;\n" + "%s%s;\n" + "/*!40101 SET character_set_client = @saved_cs_client */;\n", + is_log_table ? "CREATE TABLE IF NOT EXISTS " : "", + row[1]); + } check_io(sql_file); mysql_free_result(result); @@ -4261,6 +4287,22 @@ static int dump_all_tables_in_db(char *database) if (opt_xml) print_xml_tag(md_result_file, "", "\n", "database", "name=", database, NullS); + if (strcmp(database, "mysql") == 0) + { + char table_type[NAME_LEN]; + char ignore_flag; + uint num_fields; + num_fields= get_table_structure((char *) "general_log", + database, table_type, &ignore_flag); + if (num_fields == 0) + verbose_msg("-- Warning: get_table_structure() failed with some internal " + "error for 'general_log' table\n"); + num_fields= get_table_structure((char *) "slow_log", + database, table_type, &ignore_flag); + if (num_fields == 0) + verbose_msg("-- Warning: get_table_structure() failed with some internal " + "error for 'slow_log' table\n"); + } if (lock_tables) { DYNAMIC_STRING query; diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result index 1d4a5783c3f..fac94e855a4 100644 --- a/mysql-test/r/mysqldump.result +++ b/mysql-test/r/mysqldump.result @@ -5067,5 +5067,47 @@ RETURN CONCAT(']]]]>, ', s, '!') DROP DATABASE BUG52792; USE test; # +# Bug#45740 MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM +# +SET @old_log_output_state= @@global.log_output; +SET @old_general_log_state= @@global.general_log; +SET @old_slow_query_log_state= @@global.slow_query_log; +call mtr.add_suppression("Failed to write to mysql.general_log"); +SET @@global.log_output="TABLE"; +SET @@global.general_log='ON'; +SET @@global.slow_query_log='ON'; +DROP DATABASE mysql; +Warnings: +Error 1146 Table 'mysql.proc' doesn't exist +Error 1146 Table 'mysql.event' doesn't exist +SHOW CREATE TABLE mysql.general_log; +Table Create Table +general_log CREATE TABLE `general_log` ( + `event_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `user_host` mediumtext NOT NULL, + `thread_id` int(11) NOT NULL, + `server_id` int(10) unsigned NOT NULL, + `command_type` varchar(64) NOT NULL, + `argument` mediumtext NOT NULL +) ENGINE=CSV DEFAULT CHARSET=utf8 COMMENT='General log' +SHOW CREATE TABLE mysql.slow_log; +Table Create Table +slow_log CREATE TABLE `slow_log` ( + `start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `user_host` mediumtext NOT NULL, + `query_time` time NOT NULL, + `lock_time` time NOT NULL, + `rows_sent` int(11) NOT NULL, + `rows_examined` int(11) NOT NULL, + `db` varchar(512) NOT NULL, + `last_insert_id` int(11) NOT NULL, + `insert_id` int(11) NOT NULL, + `server_id` int(10) unsigned NOT NULL, + `sql_text` mediumtext NOT NULL +) ENGINE=CSV DEFAULT CHARSET=utf8 COMMENT='Slow log' +SET @@global.log_output= @old_log_output_state; +SET @@global.slow_query_log= @old_slow_query_log_state; +SET @@global.general_log= @old_general_log_state; +# # End of 5.1 tests # diff --git a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test index 20c788e3b9a..7393ca88f0a 100644 --- a/mysql-test/t/mysqldump.test +++ b/mysql-test/t/mysqldump.test @@ -2308,9 +2308,32 @@ DROP DATABASE BUG52792; USE test; +# Wait till we reached the initial number of concurrent sessions +--source include/wait_until_count_sessions.inc + +--echo # +--echo # Bug#45740 MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM +--echo # +SET @old_log_output_state= @@global.log_output; +SET @old_general_log_state= @@global.general_log; +SET @old_slow_query_log_state= @@global.slow_query_log; + +call mtr.add_suppression("Failed to write to mysql.general_log"); +--exec $MYSQL_DUMP -uroot --all-databases > $MYSQLTEST_VARDIR/tmp/bug45740.sql +# Make log_output as table and enable general_log and slow_log +SET @@global.log_output="TABLE"; +SET @@global.general_log='ON'; +SET @@global.slow_query_log='ON'; +DROP DATABASE mysql; +--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/bug45740.sql +SHOW CREATE TABLE mysql.general_log; +SHOW CREATE TABLE mysql.slow_log; +--remove_file $MYSQLTEST_VARDIR/tmp/bug45740.sql + +SET @@global.log_output= @old_log_output_state; +SET @@global.slow_query_log= @old_slow_query_log_state; +SET @@global.general_log= @old_general_log_state; + --echo # --echo # End of 5.1 tests --echo # - -# Wait till we reached the initial number of concurrent sessions ---source include/wait_until_count_sessions.inc From e7364ec29c522b5d98951b851ca3cc62a9f46d59 Mon Sep 17 00:00:00 2001 From: Venkata Sidagam Date: Mon, 7 May 2012 16:46:44 +0530 Subject: [PATCH 31/39] Bug #11754178 45740: MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM Problem Statement: ------------------ mysqldump is not having the dump stmts for general_log and slow_log tables. That is because of the fix for Bug#26121. Hence, after dropping the mysql database, and applying the dump by enabling the logging, "'general_log' table not found" errors are logged into the server log file. Analysis: --------- As part of the fix for Bug#26121, we skipped the dumping of tables for general_log and slow_log, because the data dump of those tables are taking LOCKS, which is not allowed for log tables. Fix: ---- We came up with an approach that instead of taking both meta data and data dump information for those tables, take only the meta data dump which doesn't need LOCKS. As part of fixing the issue we came up with below algorithm. Design before fix: 1) mysql database is having tables like db, event,... general_log, ... slow_log... 2) Skip general_log and slow_log while preparing the tables list 3) Take the TL_READ lock on tables which are present in the table list and do 'show create table'. 4) Release the lock. Design with the fix: 1) mysql database is having tables like db, event,... general_log, ... slow_log... 2) Skip general_log and slow_log while preparing the tables list 3) Explicitly call the 'show create table' for general_log and slow_log 3) Take the TL_READ lock on tables which are present in the table list and do 'show create table'. 4) Release the lock. While taking the meta data dump for general_log and slow_log the "CREATE TABLE" is replaced with "CREATE TABLE IF NOT EXISTS". This is because we skipped "DROP TABLE" for those tables, "DROP TABLE" fails for these tables if logging is enabled. Customer is applying the dump by enabling logging so, if the dump has "DROP TABLE" it will fail. Hence, removed the "DROP TABLE" stmts for those tables. After the fix we could observe "Table 'mysql.general_log' doesn't exist" errors initially that is because in the customer scenario they are dropping the mysql database by enabling the logging, Hence, those errors are expected. Once we apply the dump which is taken before the "drop database mysql", the errors will not be there. client/mysqldump.c: In get_table_structure() added code to skip the DROP TABLE stmts for general_log and slow_log tables, because when logging is enabled those stmts will fail. And replaced CREATE TABLE with CREATE IF NOT EXISTS for those tables, just to make sure CREATE stmt for those tables doesn't fail since we removed DROP stmts for those tables. In dump_all_tables_in_db() added code to call get_table_structure() for general_log and slow_log tables. mysql-test/r/mysqldump.result: Added a test as part of fix for Bug #11754178 mysql-test/t/mysqldump.test: Added a test as part of fix for Bug #11754178 --- client/mysqldump.c | 58 ++++++++++++++++++++++++++++++----- mysql-test/r/mysqldump.result | 42 +++++++++++++++++++++++++ mysql-test/t/mysqldump.test | 29 ++++++++++++++++-- 3 files changed, 118 insertions(+), 11 deletions(-) diff --git a/client/mysqldump.c b/client/mysqldump.c index 8601b533849..dc124f1b335 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -84,6 +84,15 @@ #define IGNORE_DATA 0x01 /* don't dump data for this table */ #define IGNORE_INSERT_DELAYED 0x02 /* table doesn't support INSERT DELAYED */ +/* general_log or slow_log tables under mysql database */ +static inline my_bool general_log_or_slow_log_tables(const char *db, + const char *table) +{ + return (strcmp(db, "mysql") == 0) && + ((strcmp(table, "general_log") == 0) || + (strcmp(table, "slow_log") == 0)); +} + static void add_load_option(DYNAMIC_STRING *str, const char *option, const char *option_value); static ulong find_set(TYPELIB *lib, const char *x, uint length, @@ -2458,6 +2467,7 @@ static uint get_table_structure(char *table, char *db, char *table_type, "TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'"; FILE *sql_file= md_result_file; int len; + my_bool is_log_table; MYSQL_RES *result; MYSQL_ROW row; DBUG_ENTER("get_table_structure"); @@ -2542,9 +2552,12 @@ static uint get_table_structure(char *table, char *db, char *table_type, /* Even if the "table" is a view, we do a DROP TABLE here. The view-specific code below fills in the DROP VIEW. + We will skip the DROP TABLE for general_log and slow_log, since + those stmts will fail, in case we apply dump by enabling logging. */ - fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", - opt_quoted_table); + if (!general_log_or_slow_log_tables(db, table)) + fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", + opt_quoted_table); check_io(sql_file); } @@ -2656,12 +2669,25 @@ static uint get_table_structure(char *table, char *db, char *table_type, row= mysql_fetch_row(result); - fprintf(sql_file, (opt_compatible_mode & 3) ? "%s;\n" : - "/*!40101 SET @saved_cs_client = @@character_set_client */;\n" - "/*!40101 SET character_set_client = utf8 */;\n" - "%s;\n" - "/*!40101 SET character_set_client = @saved_cs_client */;\n", - row[1]); + is_log_table= general_log_or_slow_log_tables(db, table); + if (is_log_table) + row[1]+= 13; /* strlen("CREATE TABLE ")= 13 */ + if (opt_compatible_mode & 3) + { + fprintf(sql_file, + is_log_table ? "CREATE TABLE IF NOT EXISTS %s;\n" : "%s;\n", + row[1]); + } + else + { + fprintf(sql_file, + "/*!40101 SET @saved_cs_client = @@character_set_client */;\n" + "/*!40101 SET character_set_client = utf8 */;\n" + "%s%s;\n" + "/*!40101 SET character_set_client = @saved_cs_client */;\n", + is_log_table ? "CREATE TABLE IF NOT EXISTS " : "", + row[1]); + } check_io(sql_file); mysql_free_result(result); @@ -4261,6 +4287,22 @@ static int dump_all_tables_in_db(char *database) if (opt_xml) print_xml_tag(md_result_file, "", "\n", "database", "name=", database, NullS); + if (strcmp(database, "mysql") == 0) + { + char table_type[NAME_LEN]; + char ignore_flag; + uint num_fields; + num_fields= get_table_structure((char *) "general_log", + database, table_type, &ignore_flag); + if (num_fields == 0) + verbose_msg("-- Warning: get_table_structure() failed with some internal " + "error for 'general_log' table\n"); + num_fields= get_table_structure((char *) "slow_log", + database, table_type, &ignore_flag); + if (num_fields == 0) + verbose_msg("-- Warning: get_table_structure() failed with some internal " + "error for 'slow_log' table\n"); + } if (lock_tables) { DYNAMIC_STRING query; diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result index 1d4a5783c3f..fac94e855a4 100644 --- a/mysql-test/r/mysqldump.result +++ b/mysql-test/r/mysqldump.result @@ -5067,5 +5067,47 @@ RETURN CONCAT(']]]]>, ', s, '!') DROP DATABASE BUG52792; USE test; # +# Bug#45740 MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM +# +SET @old_log_output_state= @@global.log_output; +SET @old_general_log_state= @@global.general_log; +SET @old_slow_query_log_state= @@global.slow_query_log; +call mtr.add_suppression("Failed to write to mysql.general_log"); +SET @@global.log_output="TABLE"; +SET @@global.general_log='ON'; +SET @@global.slow_query_log='ON'; +DROP DATABASE mysql; +Warnings: +Error 1146 Table 'mysql.proc' doesn't exist +Error 1146 Table 'mysql.event' doesn't exist +SHOW CREATE TABLE mysql.general_log; +Table Create Table +general_log CREATE TABLE `general_log` ( + `event_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `user_host` mediumtext NOT NULL, + `thread_id` int(11) NOT NULL, + `server_id` int(10) unsigned NOT NULL, + `command_type` varchar(64) NOT NULL, + `argument` mediumtext NOT NULL +) ENGINE=CSV DEFAULT CHARSET=utf8 COMMENT='General log' +SHOW CREATE TABLE mysql.slow_log; +Table Create Table +slow_log CREATE TABLE `slow_log` ( + `start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `user_host` mediumtext NOT NULL, + `query_time` time NOT NULL, + `lock_time` time NOT NULL, + `rows_sent` int(11) NOT NULL, + `rows_examined` int(11) NOT NULL, + `db` varchar(512) NOT NULL, + `last_insert_id` int(11) NOT NULL, + `insert_id` int(11) NOT NULL, + `server_id` int(10) unsigned NOT NULL, + `sql_text` mediumtext NOT NULL +) ENGINE=CSV DEFAULT CHARSET=utf8 COMMENT='Slow log' +SET @@global.log_output= @old_log_output_state; +SET @@global.slow_query_log= @old_slow_query_log_state; +SET @@global.general_log= @old_general_log_state; +# # End of 5.1 tests # diff --git a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test index 20c788e3b9a..7393ca88f0a 100644 --- a/mysql-test/t/mysqldump.test +++ b/mysql-test/t/mysqldump.test @@ -2308,9 +2308,32 @@ DROP DATABASE BUG52792; USE test; +# Wait till we reached the initial number of concurrent sessions +--source include/wait_until_count_sessions.inc + +--echo # +--echo # Bug#45740 MYSQLDUMP DOESN'T DUMP GENERAL_LOG AND SLOW_QUERY CAUSES RESTORE PROBLEM +--echo # +SET @old_log_output_state= @@global.log_output; +SET @old_general_log_state= @@global.general_log; +SET @old_slow_query_log_state= @@global.slow_query_log; + +call mtr.add_suppression("Failed to write to mysql.general_log"); +--exec $MYSQL_DUMP -uroot --all-databases > $MYSQLTEST_VARDIR/tmp/bug45740.sql +# Make log_output as table and enable general_log and slow_log +SET @@global.log_output="TABLE"; +SET @@global.general_log='ON'; +SET @@global.slow_query_log='ON'; +DROP DATABASE mysql; +--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/bug45740.sql +SHOW CREATE TABLE mysql.general_log; +SHOW CREATE TABLE mysql.slow_log; +--remove_file $MYSQLTEST_VARDIR/tmp/bug45740.sql + +SET @@global.log_output= @old_log_output_state; +SET @@global.slow_query_log= @old_slow_query_log_state; +SET @@global.general_log= @old_general_log_state; + --echo # --echo # End of 5.1 tests --echo # - -# Wait till we reached the initial number of concurrent sessions ---source include/wait_until_count_sessions.inc From 391ea219c21a66e24ea9985ca2581f75bbd7c8f5 Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Thu, 10 May 2012 10:18:31 +0530 Subject: [PATCH 32/39] Bug #14007649 65111: INNODB SOMETIMES FAILS TO UPDATE ROWS INSERTED BY A CONCURRENT TRANSACTIO The member function QUICK_RANGE_SELECT::init_ror_merged_scan() performs a table handler clone. Innodb does not provide a clone operation. The ha_innobase::clone() is not there. The handler::clone() does not take care of the ha_innobase->prebuilt->select_lock_type. Because of this what happens is that for one index we do a locking read, and for the other index we were doing a non-locking (consistent) read. The patch introduces ha_innobase::clone() member function. It is implemented similar to ha_myisam::clone(). It calls the base class handler::clone() and then does any additional operation required. I am setting the ha_innobase->prebuilt->select_lock_type correctly. rb://1060 approved by Marko --- .../suite/innodb/r/innodb_bug14007649.result | 56 ++++++++++++++++++ .../suite/innodb/t/innodb_bug14007649.test | 58 +++++++++++++++++++ .../innodb_plugin/r/innodb_bug14007649.result | 56 ++++++++++++++++++ .../innodb_plugin/t/innodb_bug14007649.test | 58 +++++++++++++++++++ storage/innobase/btr/btr0cur.c | 2 + storage/innobase/handler/ha_innodb.cc | 24 ++++++++ storage/innobase/handler/ha_innodb.h | 1 + storage/innodb_plugin/btr/btr0cur.c | 2 + storage/innodb_plugin/handler/ha_innodb.cc | 25 ++++++++ storage/innodb_plugin/handler/ha_innodb.h | 1 + 10 files changed, 283 insertions(+) create mode 100644 mysql-test/suite/innodb/r/innodb_bug14007649.result create mode 100644 mysql-test/suite/innodb/t/innodb_bug14007649.test create mode 100644 mysql-test/suite/innodb_plugin/r/innodb_bug14007649.result create mode 100644 mysql-test/suite/innodb_plugin/t/innodb_bug14007649.test diff --git a/mysql-test/suite/innodb/r/innodb_bug14007649.result b/mysql-test/suite/innodb/r/innodb_bug14007649.result new file mode 100644 index 00000000000..1f802dcd146 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug14007649.result @@ -0,0 +1,56 @@ +create table t1 ( +rowid int, +f1 int, +f2 int, +key i1 (f1, f2), +key i2 (f2)) engine=innodb; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `rowid` int(11) DEFAULT NULL, + `f1` int(11) DEFAULT NULL, + `f2` int(11) DEFAULT NULL, + KEY `i1` (`f1`,`f2`), + KEY `i2` (`f2`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +insert into `t1` (rowid, f1, f2) values (1, 1, 10), (2, 1, NULL); +start transaction with consistent snapshot; +start transaction; +update t1 set f2 = 4 where f1 = 1 and f2 is null; +(b) Number of rows updated: +select row_count(); +row_count() +1 +insert into t1 values (3, 1, null); +(b) After update and insert query. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 4 +3 1 NULL +commit; +(a) Before the update statement is executed. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 NULL +SET SESSION debug="+d,bug14007649"; +update t1 set f2 = 6 where f1 = 1 and f2 is null; +(a) Number of rows updated: +select row_count(); +row_count() +1 +(a) After the update statement is executed. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 NULL +3 1 6 +commit; +"The trx with consistent snapshot ended." +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 4 +3 1 6 +drop table t1; diff --git a/mysql-test/suite/innodb/t/innodb_bug14007649.test b/mysql-test/suite/innodb/t/innodb_bug14007649.test new file mode 100644 index 00000000000..2832bd41f3c --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug14007649.test @@ -0,0 +1,58 @@ +--source include/have_innodb.inc +--source include/have_debug.inc + +create table t1 ( + rowid int, + f1 int, + f2 int, + key i1 (f1, f2), + key i2 (f2)) engine=innodb; + +show create table t1; +insert into `t1` (rowid, f1, f2) values (1, 1, 10), (2, 1, NULL); + +connect (a,localhost,root,,); +connect (b,localhost,root,,); + +connection a; +start transaction with consistent snapshot; + +connection b; +start transaction; +update t1 set f2 = 4 where f1 = 1 and f2 is null; + +-- echo (b) Number of rows updated: +select row_count(); + +insert into t1 values (3, 1, null); + +-- echo (b) After update and insert query. +select rowid, f1, f2 from t1; + +commit; + +connection a; + +-- echo (a) Before the update statement is executed. +select rowid, f1, f2 from t1; + +SET SESSION debug="+d,bug14007649"; +update t1 set f2 = 6 where f1 = 1 and f2 is null; + +-- echo (a) Number of rows updated: +select row_count(); + +-- echo (a) After the update statement is executed. +select rowid, f1, f2 from t1; + +commit; + +--echo "The trx with consistent snapshot ended." + +select rowid, f1, f2 from t1; + +connection default; +disconnect a; +disconnect b; + +drop table t1; diff --git a/mysql-test/suite/innodb_plugin/r/innodb_bug14007649.result b/mysql-test/suite/innodb_plugin/r/innodb_bug14007649.result new file mode 100644 index 00000000000..1f802dcd146 --- /dev/null +++ b/mysql-test/suite/innodb_plugin/r/innodb_bug14007649.result @@ -0,0 +1,56 @@ +create table t1 ( +rowid int, +f1 int, +f2 int, +key i1 (f1, f2), +key i2 (f2)) engine=innodb; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `rowid` int(11) DEFAULT NULL, + `f1` int(11) DEFAULT NULL, + `f2` int(11) DEFAULT NULL, + KEY `i1` (`f1`,`f2`), + KEY `i2` (`f2`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +insert into `t1` (rowid, f1, f2) values (1, 1, 10), (2, 1, NULL); +start transaction with consistent snapshot; +start transaction; +update t1 set f2 = 4 where f1 = 1 and f2 is null; +(b) Number of rows updated: +select row_count(); +row_count() +1 +insert into t1 values (3, 1, null); +(b) After update and insert query. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 4 +3 1 NULL +commit; +(a) Before the update statement is executed. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 NULL +SET SESSION debug="+d,bug14007649"; +update t1 set f2 = 6 where f1 = 1 and f2 is null; +(a) Number of rows updated: +select row_count(); +row_count() +1 +(a) After the update statement is executed. +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 NULL +3 1 6 +commit; +"The trx with consistent snapshot ended." +select rowid, f1, f2 from t1; +rowid f1 f2 +1 1 10 +2 1 4 +3 1 6 +drop table t1; diff --git a/mysql-test/suite/innodb_plugin/t/innodb_bug14007649.test b/mysql-test/suite/innodb_plugin/t/innodb_bug14007649.test new file mode 100644 index 00000000000..e1d27ecd742 --- /dev/null +++ b/mysql-test/suite/innodb_plugin/t/innodb_bug14007649.test @@ -0,0 +1,58 @@ +--source include/have_innodb_plugin.inc +--source include/have_debug.inc + +create table t1 ( + rowid int, + f1 int, + f2 int, + key i1 (f1, f2), + key i2 (f2)) engine=innodb; + +show create table t1; +insert into `t1` (rowid, f1, f2) values (1, 1, 10), (2, 1, NULL); + +connect (a,localhost,root,,); +connect (b,localhost,root,,); + +connection a; +start transaction with consistent snapshot; + +connection b; +start transaction; +update t1 set f2 = 4 where f1 = 1 and f2 is null; + +-- echo (b) Number of rows updated: +select row_count(); + +insert into t1 values (3, 1, null); + +-- echo (b) After update and insert query. +select rowid, f1, f2 from t1; + +commit; + +connection a; + +-- echo (a) Before the update statement is executed. +select rowid, f1, f2 from t1; + +SET SESSION debug="+d,bug14007649"; +update t1 set f2 = 6 where f1 = 1 and f2 is null; + +-- echo (a) Number of rows updated: +select row_count(); + +-- echo (a) After the update statement is executed. +select rowid, f1, f2 from t1; + +commit; + +--echo "The trx with consistent snapshot ended." + +select rowid, f1, f2 from t1; + +connection default; +disconnect a; +disconnect b; + +drop table t1; diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index c09d6408fa0..4678ea8cd22 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -2792,6 +2792,8 @@ btr_estimate_n_rows_in_range( n_rows = n_rows * 2; } + DBUG_EXECUTE_IF("bug14007649", return(n_rows);); + /* Do not estimate the number of rows in the range to over 1 / 2 of the estimated rows in the whole table */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6576016501f..8ed082037f0 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3180,6 +3180,30 @@ table_opened: DBUG_RETURN(0); } +handler* +ha_innobase::clone( +/*===============*/ + const char* name, /*!< in: table name */ + MEM_ROOT* mem_root) /*!< in: memory context */ +{ + ha_innobase* new_handler; + + DBUG_ENTER("ha_innobase::clone"); + + new_handler = static_cast(handler::clone(name, + mem_root)); + if (new_handler) { + DBUG_ASSERT(new_handler->prebuilt != NULL); + DBUG_ASSERT(new_handler->user_thd == user_thd); + DBUG_ASSERT(new_handler->prebuilt->trx == prebuilt->trx); + + new_handler->prebuilt->select_lock_type + = prebuilt->select_lock_type; + } + + DBUG_RETURN(new_handler); +} + uint ha_innobase::max_supported_key_part_length() const { diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 8b91f7d4c51..93229ad9185 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -117,6 +117,7 @@ class ha_innobase: public handler const key_map *keys_to_use_for_scanning() { return &key_map_full; } int open(const char *name, int mode, uint test_if_locked); + handler* clone(const char *name, MEM_ROOT *mem_root); int close(void); double scan_time(); double read_time(uint index, uint ranges, ha_rows rows); diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index b67da53ec8a..223b976dea7 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -3194,6 +3194,8 @@ btr_estimate_n_rows_in_range( n_rows = n_rows * 2; } + DBUG_EXECUTE_IF("bug14007649", return(n_rows);); + /* Do not estimate the number of rows in the range to over 1 / 2 of the estimated rows in the whole table */ diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index ca6788999ae..2207f9d009d 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -3889,6 +3889,31 @@ table_opened: DBUG_RETURN(0); } +UNIV_INTERN +handler* +ha_innobase::clone( +/*===============*/ + const char* name, /*!< in: table name */ + MEM_ROOT* mem_root) /*!< in: memory context */ +{ + ha_innobase* new_handler; + + DBUG_ENTER("ha_innobase::clone"); + + new_handler = static_cast(handler::clone(name, + mem_root)); + if (new_handler) { + DBUG_ASSERT(new_handler->prebuilt != NULL); + DBUG_ASSERT(new_handler->user_thd == user_thd); + DBUG_ASSERT(new_handler->prebuilt->trx == prebuilt->trx); + + new_handler->prebuilt->select_lock_type + = prebuilt->select_lock_type; + } + + DBUG_RETURN(new_handler); +} + UNIV_INTERN uint ha_innobase::max_supported_key_part_length() const diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h index f7a5456b1a7..15cc4a77b6f 100644 --- a/storage/innodb_plugin/handler/ha_innodb.h +++ b/storage/innodb_plugin/handler/ha_innodb.h @@ -132,6 +132,7 @@ class ha_innobase: public handler const key_map* keys_to_use_for_scanning(); int open(const char *name, int mode, uint test_if_locked); + handler* clone(const char *name, MEM_ROOT *mem_root); int close(void); double scan_time(); double read_time(uint index, uint ranges, ha_rows rows); From 38c4ef63b862fd59b4285fc3bfd9f5dff536649d Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 6 Jun 2012 14:15:29 +0200 Subject: [PATCH 33/39] MDEV-302 lp:988204 MariaDB 5.5.23 binaries don't use libaio simplify debian/dist/*/rules slightly. move hard-coded config value to cmake files. (the actual fix is -DBUILD_CONFIG=mysql_release) --- CMakeLists.txt | 4 ++++ .../build_configurations/mysql_release.cmake | 15 ++++++++++-- debian/dist/Debian/rules | 23 ++----------------- debian/dist/Ubuntu/rules | 23 ++----------------- 4 files changed, 21 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b0c4898785..2599a025e27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,10 @@ IF (NOT CPACK_GENERATOR) ENDIF(WIN32) ENDIF(NOT CPACK_GENERATOR) +IF(DEB) + SET(INSTALL_LAYOUT "DEB") +ENDIF(DEB) + INCLUDE(mysql_version) INCLUDE(cpack_rpm) INCLUDE(cpack_source_ignore_files) diff --git a/cmake/build_configurations/mysql_release.cmake b/cmake/build_configurations/mysql_release.cmake index 87fe47cac12..5b2596491ad 100644 --- a/cmake/build_configurations/mysql_release.cmake +++ b/cmake/build_configurations/mysql_release.cmake @@ -94,8 +94,19 @@ IF(FEATURE_SET) ENDIF() OPTION(ENABLED_LOCAL_INFILE "" ON) -SET(WITH_SSL bundled CACHE STRING "") -SET(WITH_ZLIB bundled CACHE STRING "") +IF(RPM) + SET(WITH_SSL system CACHE STRING "") + SET(WITH_ZLIB system CACHE STRING "") +ELSEIF(DEB) + SET(WITH_SSL system CACHE STRING "") + SET(WITH_ZLIB system CACHE STRING "") + SET(WITH_LIBWRAP ON) + SET(WITH_MAX ON) + SET(HAVE_EMBEDDED_PRIVILEGE_CONTROL ON) +ELSE() + SET(WITH_SSL bundled CACHE STRING "") + SET(WITH_ZLIB bundled CACHE STRING "") +ENDIF() IF(NOT COMPILATION_COMMENT) SET(COMPILATION_COMMENT "MariaDB Server") diff --git a/debian/dist/Debian/rules b/debian/dist/Debian/rules index 5cb22ae60d6..96ad527542d 100755 --- a/debian/dist/Debian/rules +++ b/debian/dist/Debian/rules @@ -70,30 +70,11 @@ endif CXX=$${MYSQL_BUILD_CXX:-g++} \ CXXFLAGS=$${MYSQL_BUILD_CXXFLAGS:-"-O2 -fno-omit-frame-pointer -g -pipe -Wall -Wno-uninitialized"} \ cmake .. \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DINSTALL_SBINDIR=sbin \ - -DMYSQL_DATADIR=/var/lib/mysql \ - -DINSTALL_INCLUDEDIR=include/mysql \ - -DINSTALL_INFODIR=share/info \ - -DINSTALL_MANDIR=share/man \ - \ -DMYSQL_SERVER_SUFFIX="-$(DEBVERSION)" \ + -DBUILD_CONFIG=mysql_release \ -DCOMPILATION_COMMENT="mariadb.org binary distribution" \ -DSYSTEM_TYPE="debian-linux-gnu" \ - -DINSTALL_LAYOUT=DEB \ - \ - -DENABLED_LOCAL_INFILE=1 \ - -DWITH_FAST_MUTEXES=1 \ - \ - -DMYSQL_UNIX_ADDR=/var/run/mysqld/mysqld.sock \ - \ - -DEXTRA_CHARSETS=all \ - -DWITH_LIBWRAP=1 \ - -DWITH_SSL=system \ - -DWITH_ZLIB=system \ - -DWITH_EMBEDDED_SERVER=1 \ - -DHAVE_EMBEDDED_PRIVILEGE_CONTROL=ON \ - -DWITH_MAX=1' + -DDEB=debian' touch $@ diff --git a/debian/dist/Ubuntu/rules b/debian/dist/Ubuntu/rules index 592b2e4d7d1..bb413ad83cc 100755 --- a/debian/dist/Ubuntu/rules +++ b/debian/dist/Ubuntu/rules @@ -70,30 +70,11 @@ endif CXX=$${MYSQL_BUILD_CXX:-g++} \ CXXFLAGS=$${MYSQL_BUILD_CXXFLAGS:-"-O2 -fno-omit-frame-pointer -g -pipe -Wall -Wno-uninitialized"} \ cmake .. \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DINSTALL_SBINDIR=sbin \ - -DMYSQL_DATADIR=/var/lib/mysql \ - -DINSTALL_INCLUDEDIR=include/mysql \ - -DINSTALL_INFODIR=share/info \ - -DINSTALL_MANDIR=share/man \ - \ -DMYSQL_SERVER_SUFFIX="-$(DEBVERSION)" \ + -DBUILD_CONFIG=mysql_release \ -DCOMPILATION_COMMENT="mariadb.org binary distribution" \ -DSYSTEM_TYPE="debian-linux-gnu" \ - -DINSTALL_LAYOUT=DEB \ - \ - -DENABLED_LOCAL_INFILE=1 \ - -DWITH_FAST_MUTEXES=1 \ - \ - -DMYSQL_UNIX_ADDR=/var/run/mysqld/mysqld.sock \ - \ - -DEXTRA_CHARSETS=all \ - -DWITH_LIBWRAP=1 \ - -DWITH_SSL=system \ - -DWITH_ZLIB=system \ - -DWITH_EMBEDDED_SERVER=1 \ - -DHAVE_EMBEDDED_PRIVILEGE_CONTROL=ON \ - -DWITH_MAX=1' + -DDEB=ubuntu' touch $@ From 1838938671197b0ae40bed0a99ea62dbf4be2595 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 6 Jun 2012 16:41:13 +0200 Subject: [PATCH 34/39] fixes for bintar mtr failures: look for plugins in the correct path. skip --plugin-load if it has the empty soname part, not only if the whole argument is empty. --- mysql-test/mysql-test-run.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index ebe7af3641d..0f9006cad69 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -2717,6 +2717,7 @@ sub setup_vardir() { # hm, what paths work for debs and for rpms ? for (<$bindir/lib64/mysql/plugin/*.so>, <$bindir/lib/mysql/plugin/*.so>, + <$bindir/lib/plugin/*.so>, # bintar <$bindir/lib/plugin/*.dll>) { my $pname=basename($_); @@ -5292,6 +5293,7 @@ sub mysqld_arguments ($$$) { } elsif ($plugin = mtr_match_prefix($arg, "--plugin-load=")) { + next if $plugin =~ /=$/; push @plugins, $plugin unless $seen{$plugin}; $seen{$plugin} = 1; } From 7fcfdf7c16f44aed7eab293e020e7a5ee12ccb12 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 7 Jun 2012 19:15:41 +0200 Subject: [PATCH 35/39] client's option is default-character-set, server's is character-set-server --- debian/additions/mariadb.cnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/additions/mariadb.cnf b/debian/additions/mariadb.cnf index e3803c4aaeb..927e9d1ce88 100644 --- a/debian/additions/mariadb.cnf +++ b/debian/additions/mariadb.cnf @@ -3,7 +3,7 @@ [client] # Default is Latin1, if you need UTF-8 set this (also in server section) -#character-set-server = utf8 +#default-character-set = utf8 [mysqld] # From d2ca6d2e7ff028f6abbab52e36530046b3f84a49 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 8 Jun 2012 14:50:50 +0200 Subject: [PATCH 36/39] apply mysql fix for bug#58421 to XtraDB --- mysql-test/mysql-test-run.pl | 14 +---- storage/xtradb/os/os0file.c | 99 +++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 14 deletions(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 0f9006cad69..2e02ab759ee 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -3464,12 +3464,6 @@ sub mysql_install_db { mtr_add_arg($args, "--lc-messages-dir=%s", $install_lang); mtr_add_arg($args, "--character-sets-dir=%s", $install_chsdir); - # On some old linux kernels, aio on tmpfs is not supported - # Remove this if/when Bug #58421 fixes this in the server - if ($^O eq "linux" && $opt_mem) { - mtr_add_arg($args, "--loose-skip-innodb-use-native-aio"); - } - # InnoDB arguments that affect file location and sizes may # need to be given to the bootstrap process as well as the # server process. @@ -4743,6 +4737,7 @@ sub extract_warning_lines ($$) { qr|Access denied for user|, qr|Aborted connection|, qr|table.*is full|, + qr|Linux Native AIO|, # warning that aio does not work on /dev/shm ); my $matched_lines= []; @@ -5246,13 +5241,6 @@ sub mysqld_arguments ($$$) { mtr_add_arg($args, "--user=root"); } - # On some old linux kernels, aio on tmpfs is not supported - # Remove this if/when Bug #58421 fixes this in the server - if ($^O eq "linux" && $opt_mem) - { - mtr_add_arg($args, "--loose-skip-innodb-use-native-aio"); - } - if (!using_extern() and !$opt_user_args) { # Turn on logging to file diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 0c9b116b932..eefe17df740 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -3318,7 +3318,91 @@ retry: fprintf(stderr, "InnoDB: You can disable Linux Native AIO by" - " setting innodb_native_aio = off in my.cnf\n"); + " setting innodb_use_native_aio = 0 in my.cnf\n"); + return(FALSE); +} + +/******************************************************************//** +Checks if the system supports native linux aio. On some kernel +versions where native aio is supported it won't work on tmpfs. In such +cases we can't use native aio as it is not possible to mix simulated +and native aio. +@return: TRUE if supported, FALSE otherwise. */ +static +ibool +os_aio_native_aio_supported(void) +/*=============================*/ +{ + int fd; + byte* buf; + byte* ptr; + struct io_event io_event; + io_context_t io_ctx; + struct iocb iocb; + struct iocb* p_iocb; + int err; + + if (!os_aio_linux_create_io_ctx(1, &io_ctx)) { + /* The platform does not support native aio. */ + return(FALSE); + } + + /* Now check if tmpdir supports native aio ops. */ + fd = innobase_mysql_tmpfile(); + + if (fd < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: unable to create " + "temp file to check native AIO support.\n"); + + return(FALSE); + } + + memset(&io_event, 0x0, sizeof(io_event)); + + buf = (byte*) ut_malloc(UNIV_PAGE_SIZE * 2); + ptr = (byte*) ut_align(buf, UNIV_PAGE_SIZE); + + /* Suppress valgrind warning. */ + memset(buf, 0x00, UNIV_PAGE_SIZE * 2); + + memset(&iocb, 0x0, sizeof(iocb)); + p_iocb = &iocb; + io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0); + + err = io_submit(io_ctx, 1, &p_iocb); + if (err >= 1) { + /* Now collect the submitted IO request. */ + err = io_getevents(io_ctx, 1, 1, &io_event, NULL); + } + + ut_free(buf); + close(fd); + + switch (err) { + case 1: + return(TRUE); + + case -EINVAL: + case -ENOSYS: + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Linux Native AIO is not" + " supported on tmpdir.\n" + "InnoDB: You can either move tmpdir to a" + " file system that supports native AIO\n" + "InnoDB: or you can set" + " innodb_use_native_aio to FALSE to avoid" + " this message.\n"); + + /* fall through. */ + default: + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Linux Native AIO check" + " on tmpdir returned error[%d]\n", -err); + } + return(FALSE); } #endif /* LINUX_NATIVE_AIO */ @@ -3458,6 +3542,19 @@ os_aio_init( os_io_init_simple(); +#if defined(LINUX_NATIVE_AIO) + /* Check if native aio is supported on this system and tmpfs */ + if (srv_use_native_aio + && !os_aio_native_aio_supported()) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: Linux Native AIO" + " disabled.\n"); + srv_use_native_aio = FALSE; + } +#endif /* LINUX_NATIVE_AIO */ + for (i = 0; i < n_segments; i++) { srv_set_io_thread_op_info(i, "not started yet"); } From 6de579241ed0bf01dfad1a7cede9fde36a1c5c03 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 15 Jun 2012 10:23:33 +0200 Subject: [PATCH 37/39] XtraDB 1.1.8-20.1 from Percona-Server 5.5.24-rel26.0 --- btr/btr0btr.c | 315 ++++++++++++++++++-------- btr/btr0cur.c | 264 +++++++++++++++------- btr/btr0pcur.c | 87 +++++-- btr/btr0sea.c | 8 +- buf/buf0buf.c | 81 ++++--- buf/buf0flu.c | 19 +- buf/buf0lru.c | 477 +++++++++++++++++++++++++++++++-------- dict/dict0boot.c | 221 +++++++++++++----- dict/dict0dict.c | 42 +++- dict/dict0load.c | 42 ++-- fil/fil0fil.c | 108 +++++---- fsp/fsp0fsp.c | 440 ++++++++++++++++++++---------------- handler/ha_innodb.cc | 455 +++++++++++++++++++++++++++---------- handler/ha_innodb.h | 13 +- handler/handler0alter.cc | 21 +- ibuf/ibuf0ibuf.c | 130 +++++++---- include/btr0btr.h | 58 ++++- include/btr0btr.ic | 2 +- include/btr0cur.h | 88 ++++---- include/btr0cur.ic | 29 ++- include/btr0pcur.h | 10 + include/buf0buf.h | 52 ++++- include/buf0buf.ic | 21 +- include/buf0lru.h | 11 +- include/buf0types.h | 9 + include/data0data.h | 23 +- include/data0data.ic | 53 ++++- include/db0err.h | 8 +- include/dict0boot.h | 20 ++ include/dict0dict.h | 14 +- include/dict0dict.ic | 21 +- include/dict0mem.h | 10 +- include/fil0fil.h | 19 +- include/fsp0fsp.h | 40 ++-- include/log0log.h | 6 +- include/mem0mem.ic | 4 - include/mtr0log.ic | 3 +- include/mtr0mtr.h | 13 +- include/mtr0mtr.ic | 11 +- include/page0page.h | 43 +++- include/page0page.ic | 38 +++- include/row0mysql.h | 8 +- include/row0sel.h | 7 +- include/srv0srv.h | 18 +- include/sync0rw.h | 18 ++ include/sync0rw.ic | 8 +- include/sync0sync.h | 10 +- include/trx0purge.h | 4 +- include/trx0rec.ic | 7 +- include/trx0rseg.ic | 9 +- include/trx0sys.h | 6 + include/trx0undo.h | 13 +- include/univ.i | 33 +-- include/ut0mem.h | 31 +-- include/ut0rnd.ic | 2 +- lock/lock0lock.c | 141 +++++++----- log/log0log.c | 175 ++++++++++---- mem/mem0pool.c | 6 +- mtr/mtr0mtr.c | 6 +- os/os0file.c | 99 +++++++- os/os0proc.c | 3 - page/page0cur.c | 23 +- page/page0page.c | 73 +++--- pars/pars0pars.c | 2 +- row/row0ins.c | 90 +++++++- row/row0merge.c | 127 +++++++---- row/row0mysql.c | 156 ++++++++++--- row/row0row.c | 25 +- row/row0sel.c | 36 +-- row/row0umod.c | 3 +- row/row0upd.c | 65 ++++-- srv/srv0srv.c | 109 ++++++--- srv/srv0start.c | 11 +- sync/sync0rw.c | 10 +- sync/sync0sync.c | 16 +- trx/trx0purge.c | 5 +- trx/trx0rec.c | 42 ++-- trx/trx0sys.c | 62 +++-- trx/trx0trx.c | 2 + trx/trx0undo.c | 43 ++-- ut/ut0mem.c | 82 +------ ut/ut0ut.c | 8 +- 82 files changed, 3318 insertions(+), 1605 deletions(-) diff --git a/btr/btr0btr.c b/btr/btr0btr.c index 433b062bcec..1fa6df44f7c 100644 --- a/btr/btr0btr.c +++ b/btr/btr0btr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -42,6 +42,28 @@ Created 6/2/1994 Heikki Tuuri #include "ibuf0ibuf.h" #include "trx0trx.h" +/**************************************************************//** +Report that an index page is corrupted. */ +UNIV_INTERN +void +btr_corruption_report( +/*==================*/ + const buf_block_t* block, /*!< in: corrupted block */ + const dict_index_t* index) /*!< in: index tree */ +{ + fprintf(stderr, "InnoDB: flag mismatch in space %u page %u" + " index %s of table %s\n", + (unsigned) buf_block_get_space(block), + (unsigned) buf_block_get_page_no(block), + index->name, index->table_name); + if (block->page.zip.data) { + buf_page_print(block->page.zip.data, + buf_block_get_zip_size(block), + BUF_PAGE_PRINT_NO_CRASH); + } + buf_page_print(buf_block_get_frame(block), 0, 0); +} + #ifdef UNIV_BLOB_DEBUG # include "srv0srv.h" # include "ut0rbt.h" @@ -664,6 +686,12 @@ btr_root_fseg_validate( { ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); + if (UNIV_UNLIKELY(srv_pass_corrupt_table)) { + return (mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space) + && (offset >= FIL_PAGE_DATA) + && (offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); + } + ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space); ut_a(offset >= FIL_PAGE_DATA); ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); @@ -698,12 +726,22 @@ btr_root_block_get( } ut_a(block); - ut_a((ibool)!!page_is_comp(buf_block_get_frame(block)) - == dict_table_is_comp(index->table)); + btr_assert_not_corrupted(block, index); #ifdef UNIV_BTR_DEBUG if (!dict_index_is_ibuf(index)) { const page_t* root = buf_block_get_frame(block); + if (UNIV_UNLIKELY(srv_pass_corrupt_table)) { + if (!btr_root_fseg_validate(FIL_PAGE_DATA + + PAGE_BTR_SEG_LEAF + + root, space)) + return(NULL); + if (!btr_root_fseg_validate(FIL_PAGE_DATA + + PAGE_BTR_SEG_TOP + + root, space)) + return(NULL); + return(block); + } ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + root, space)); ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP @@ -912,28 +950,31 @@ btr_page_alloc_for_ibuf( /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume that the caller has made the reservation for free extents! -@return new allocated block, x-latched; NULL if out of space */ -UNIV_INTERN +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ +static __attribute__((nonnull, warn_unused_result)) buf_block_t* -btr_page_alloc( -/*===========*/ +btr_page_alloc_low( +/*===============*/ dict_index_t* index, /*!< in: index */ ulint hint_page_no, /*!< in: hint of a good page */ byte file_direction, /*!< in: direction where a possible page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mtr or another + mini-transaction in which the + page should be initialized. + If init_mtr!=mtr, but the page + is already X-latched in mtr, do + not initialize the page. */ { fseg_header_t* seg_header; page_t* root; - buf_block_t* new_block; - ulint new_page_no; - - if (dict_index_is_ibuf(index)) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } root = btr_root_get(index, mtr); @@ -947,45 +988,81 @@ btr_page_alloc( reservation for free extents, and thus we know that a page can be allocated: */ - new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, - file_direction, TRUE, mtr); - if (new_page_no == FIL_NULL) { + return(fseg_alloc_free_page_general( + seg_header, hint_page_no, file_direction, + TRUE, mtr, init_mtr)); +} - return(NULL); +/**************************************************************//** +Allocates a new file page to be used in an index tree. NOTE: we assume +that the caller has made the reservation for free extents! +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ +UNIV_INTERN +buf_block_t* +btr_page_alloc( +/*===========*/ + dict_index_t* index, /*!< in: index */ + ulint hint_page_no, /*!< in: hint of a good page */ + byte file_direction, /*!< in: direction where a possible + page split is made */ + ulint level, /*!< in: level where the page is placed + in the tree */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mini-transaction + for x-latching and initializing + the page */ +{ + buf_block_t* new_block; + + if (dict_index_is_ibuf(index)) { + + return(btr_page_alloc_for_ibuf(index, mtr)); } - new_block = buf_page_get(dict_index_get_space(index), - dict_table_zip_size(index->table), - new_page_no, RW_X_LATCH, mtr); - buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); + new_block = btr_page_alloc_low( + index, hint_page_no, file_direction, level, mtr, init_mtr); + + if (new_block) { + buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); + } return(new_block); } /**************************************************************//** Gets the number of pages in a B-tree. -@return number of pages */ +@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ UNIV_INTERN ulint btr_get_size( /*=========*/ dict_index_t* index, /*!< in: index */ - ulint flag) /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ + ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ + mtr_t* mtr) /*!< in/out: mini-transaction where index + is s-latched */ { fseg_header_t* seg_header; page_t* root; ulint n; ulint dummy; - mtr_t mtr; - mtr_start(&mtr); + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK)); - mtr_s_lock(dict_index_get_lock(index), &mtr); + if (index->page == FIL_NULL + || index->to_be_dropped + || *index->name == TEMP_INDEX_PREFIX) { + return(ULINT_UNDEFINED); + } - root = btr_root_get(index, &mtr); + root = btr_root_get(index, mtr); if (srv_pass_corrupt_table && !root) { - mtr_commit(&mtr); + mtr_commit(mtr); return(0); } ut_a(root); @@ -993,22 +1070,20 @@ btr_get_size( if (flag == BTR_N_LEAF_PAGES) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - fseg_n_reserved_pages(seg_header, &n, &mtr); + fseg_n_reserved_pages(seg_header, &n, mtr); } else if (flag == BTR_TOTAL_SIZE) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - n = fseg_n_reserved_pages(seg_header, &dummy, &mtr); + n = fseg_n_reserved_pages(seg_header, &dummy, mtr); seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - n += fseg_n_reserved_pages(seg_header, &dummy, &mtr); + n += fseg_n_reserved_pages(seg_header, &dummy, mtr); } else { ut_error; } - mtr_commit(&mtr); - return(n); } @@ -1090,10 +1165,10 @@ btr_page_free( buf_block_t* block, /*!< in: block to be freed, x-latched */ mtr_t* mtr) /*!< in: mtr */ { - ulint level; - - level = btr_page_get_level(buf_block_get_frame(block), mtr); + const page_t* page = buf_block_get_frame(block); + ulint level = btr_page_get_level(page, mtr); + ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); btr_page_free_low(index, block, level, mtr); } @@ -1207,9 +1282,11 @@ btr_page_get_father_node_ptr_func( != page_no)) { rec_t* print_rec; fputs("InnoDB: Dump of the child page:\n", stderr); - buf_page_print(page_align(user_rec), 0); + buf_page_print(page_align(user_rec), 0, + BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: Dump of the parent page:\n", stderr); - buf_page_print(page_align(node_ptr), 0); + buf_page_print(page_align(node_ptr), 0, + BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: Corruption of an index tree: table ", stderr); ut_print_name(stderr, NULL, TRUE, index->table_name); @@ -1333,16 +1410,12 @@ btr_create( /* Allocate then the next page to the segment: it will be the tree root page */ - page_no = fseg_alloc_free_page(buf_block_get_frame( - ibuf_hdr_block) - + IBUF_HEADER - + IBUF_TREE_SEG_HEADER, - IBUF_TREE_ROOT_PAGE_NO, - FSP_UP, mtr); - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); - - block = buf_page_get(space, zip_size, page_no, - RW_X_LATCH, mtr); + block = fseg_alloc_free_page( + buf_block_get_frame(ibuf_hdr_block) + + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + IBUF_TREE_ROOT_PAGE_NO, + FSP_UP, mtr); + ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO); } else { #ifdef UNIV_BLOB_DEBUG if ((type & DICT_CLUSTERED) && !index->blobs) { @@ -1562,7 +1635,7 @@ btr_page_reorganize_low( ibool success = FALSE; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); + btr_assert_not_corrupted(block, index); #ifdef UNIV_ZIP_DEBUG ut_a(!page_zip || page_zip_validate(page_zip, page)); #endif /* UNIV_ZIP_DEBUG */ @@ -1664,8 +1737,8 @@ btr_page_reorganize_low( if (UNIV_UNLIKELY(data_size1 != data_size2) || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) { - buf_page_print(page, 0); - buf_page_print(temp_page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "InnoDB: Error: page old data size %lu" " new data size %lu\n" @@ -1676,6 +1749,7 @@ btr_page_reorganize_low( (unsigned long) data_size1, (unsigned long) data_size2, (unsigned long) max_ins_size1, (unsigned long) max_ins_size2); + ut_ad(0); } else { success = TRUE; } @@ -1839,7 +1913,7 @@ btr_root_raise_and_insert( level = btr_page_get_level(root, mtr); - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr); + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); ut_a(!new_page_zip == !root_page_zip); @@ -2575,7 +2649,7 @@ func_start: /* 2. Allocate a new page to the index */ new_block = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr); + btr_page_get_level(page, mtr), mtr, mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); btr_page_create(new_block, new_page_zip, cursor->index, @@ -3025,15 +3099,16 @@ btr_node_ptr_delete( ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor, mtr); + btr_cur_compress_if_useful(&cursor, FALSE, mtr); } } /*************************************************************//** If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. */ +father page, thus reducing the tree height. +@return father block */ static -void +buf_block_t* btr_lift_page_up( /*=============*/ dict_index_t* index, /*!< in: index tree */ @@ -3150,6 +3225,8 @@ btr_lift_page_up( } ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); + + return(father_block); } /*************************************************************//** @@ -3166,11 +3243,13 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; - the page must not be empty: in record delete - use btr_discard_page if the page would become - empty */ - mtr_t* mtr) /*!< in: mtr */ + btr_cur_t* cursor, /*!< in/out: cursor on the page to merge + or lift; the page must not be empty: + when deleting records, use btr_discard_page() + if the page would become empty */ + ibool adjust, /*!< in: TRUE if should adjust the + cursor position even if compression occurs */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { dict_index_t* index; ulint space; @@ -3188,13 +3267,15 @@ btr_compress( ulint* offsets; ulint data_size; ulint n_recs; + ulint nth_rec = 0; /* remove bogus warning */ ulint max_ins_size; ulint max_ins_size_reorg; block = btr_cur_get_block(cursor); page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); - ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table)); + + btr_assert_not_corrupted(block, index); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); @@ -3214,6 +3295,10 @@ btr_compress( offsets = btr_page_get_father_block(NULL, heap, index, block, mtr, &father_cursor); + if (adjust) { + nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); + } + /* Decide the page to which we try to merge and which will inherit the locks */ @@ -3240,9 +3325,9 @@ btr_compress( } else { /* The page is the only one on the level, lift the records to the father */ - btr_lift_page_up(index, block, mtr); - mem_heap_free(heap); - return(TRUE); + + merge_block = btr_lift_page_up(index, block, mtr); + goto func_exit; } n_recs = page_get_n_recs(page); @@ -3324,6 +3409,10 @@ err_exit: btr_node_ptr_delete(index, block, mtr); lock_update_merge_left(merge_block, orig_pred, block); + + if (adjust) { + nth_rec += page_rec_get_n_recs_before(orig_pred); + } } else { rec_t* orig_succ; #ifdef UNIV_BTR_DEBUG @@ -3388,7 +3477,6 @@ err_exit: } btr_blob_dbg_remove(page, index, "btr_compress"); - mem_heap_free(heap); if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) { /* Update the free bits of the B-tree page in the @@ -3440,6 +3528,16 @@ err_exit: btr_page_free(index, block, mtr); ut_ad(btr_check_node_ptr(index, merge_block, mtr)); +func_exit: + mem_heap_free(heap); + + if (adjust) { + btr_cur_position( + index, + page_rec_get_nth(merge_block->frame, nth_rec), + merge_block, cursor); + } + return(TRUE); } @@ -3876,7 +3974,7 @@ btr_index_rec_validate( (ulong) rec_get_n_fields_old(rec), (ulong) n); if (dump_on_error) { - buf_page_print(page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: corrupt record ", stderr); rec_print_old(stderr, rec); @@ -3914,7 +4012,8 @@ btr_index_rec_validate( (ulong) i, (ulong) len, (ulong) fixed_size); if (dump_on_error) { - buf_page_print(page, 0); + buf_page_print(page, 0, + BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: corrupt record ", stderr); rec_print_new(stderr, rec, offsets); @@ -4124,8 +4223,8 @@ loop: btr_validate_report2(index, level, block, right_block); fputs("InnoDB: broken FIL_PAGE_NEXT" " or FIL_PAGE_PREV links\n", stderr); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); ret = FALSE; } @@ -4134,8 +4233,8 @@ loop: != page_is_comp(page))) { btr_validate_report2(index, level, block, right_block); fputs("InnoDB: 'compact' flag mismatch\n", stderr); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); ret = FALSE; @@ -4158,8 +4257,8 @@ loop: fputs("InnoDB: records in wrong order" " on adjacent pages\n", stderr); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: record ", stderr); rec = page_rec_get_prev(page_get_supremum_rec(page)); @@ -4208,8 +4307,8 @@ loop: fputs("InnoDB: node pointer to the page is wrong\n", stderr); - buf_page_print(father_page, 0); - buf_page_print(page, 0); + buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: node ptr ", stderr); rec_print(stderr, node_ptr, index); @@ -4241,8 +4340,10 @@ loop: btr_validate_report1(index, level, block); - buf_page_print(father_page, 0); - buf_page_print(page, 0); + buf_page_print(father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(page, 0, + BUF_PAGE_PRINT_NO_CRASH); fputs("InnoDB: Error: node ptrs differ" " on levels > 0\n" @@ -4287,9 +4388,15 @@ loop: btr_validate_report1(index, level, block); - buf_page_print(father_page, 0); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print( + father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + right_page, 0, + BUF_PAGE_PRINT_NO_CRASH); } } else { page_t* right_father_page @@ -4307,10 +4414,18 @@ loop: btr_validate_report1(index, level, block); - buf_page_print(father_page, 0); - buf_page_print(right_father_page, 0); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print( + father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + right_father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + right_page, 0, + BUF_PAGE_PRINT_NO_CRASH); } if (page_get_page_no(right_father_page) @@ -4324,10 +4439,18 @@ loop: btr_validate_report1(index, level, block); - buf_page_print(father_page, 0); - buf_page_print(right_father_page, 0); - buf_page_print(page, 0); - buf_page_print(right_page, 0); + buf_page_print( + father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + right_father_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print( + right_page, 0, + BUF_PAGE_PRINT_NO_CRASH); } } } @@ -4372,6 +4495,12 @@ btr_validate_index( mtr_x_lock(dict_index_get_lock(index), &mtr); root = btr_root_get(index, &mtr); + + if (UNIV_UNLIKELY(srv_pass_corrupt_table && !root)) { + mtr_commit(&mtr); + return(FALSE); + } + n = btr_page_get_level(root, &mtr); for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { diff --git a/btr/btr0cur.c b/btr/btr0cur.c index 43313474071..e093dabebf1 100644 --- a/btr/btr0cur.c +++ b/btr/btr0cur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -438,7 +438,12 @@ btr_cur_search_to_nth_level( ut_ad(dict_index_check_search_tuple(index, tuple)); ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->page != FIL_NULL); + UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match); + UNIV_MEM_INVALID(&cursor->up_bytes, sizeof cursor->up_bytes); + UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match); + UNIV_MEM_INVALID(&cursor->low_bytes, sizeof cursor->low_bytes); #ifdef UNIV_DEBUG cursor->up_match = ULINT_UNDEFINED; cursor->low_match = ULINT_UNDEFINED; @@ -810,11 +815,11 @@ retry_page_get: if (level != 0) { /* x-latch the page */ - page = btr_page_get( + buf_block_t* child_block = btr_block_get( space, zip_size, page_no, RW_X_LATCH, index, mtr); - ut_a((ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); + page = buf_block_get_frame(child_block); + btr_assert_not_corrupted(child_block, index); } else { cursor->low_match = low_match; cursor->low_bytes = low_bytes; @@ -1959,7 +1964,7 @@ btr_cur_update_in_place( page_zip = buf_block_get_page_zip(block); /* Check that enough space is available on the compressed page. */ - if (UNIV_LIKELY_NULL(page_zip) + if (page_zip && !btr_cur_update_alloc_zip(page_zip, block, index, rec_offs_size(offsets), FALSE, mtr)) { return(DB_ZIP_OVERFLOW); @@ -2158,7 +2163,7 @@ any_extern: ut_a(!page_zip || page_zip_validate(page_zip, page)); #endif /* UNIV_ZIP_DEBUG */ - if (UNIV_LIKELY_NULL(page_zip) + if (page_zip && !btr_cur_update_alloc_zip(page_zip, block, index, new_rec_size, TRUE, mtr)) { err = DB_ZIP_OVERFLOW; @@ -2321,7 +2326,9 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update */ + btr_cur_t* cursor, /*!< in/out: cursor on the record to update; + cursor may become invalid if *big_rec == NULL + || !(flags & BTR_KEEP_POS_FLAG) */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -2463,10 +2470,10 @@ btr_cur_pessimistic_update( record to be inserted: we have to remember which fields were such */ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap); + ut_ad(rec_offs_validate(rec, index, offsets)); n_ext += btr_push_update_extern_fields(new_entry, update, *heap); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { ut_ad(page_is_comp(page)); if (page_zip_rec_needs_ext( rec_get_converted_size(index, new_entry, n_ext), @@ -2486,6 +2493,10 @@ make_external: err = DB_TOO_BIG_RECORD; goto return_after_reservations; } + + ut_ad(page_is_leaf(page)); + ut_ad(dict_index_is_clust(index)); + ut_ad(flags & BTR_KEEP_POS_FLAG); } if (trx->fake_changes) { @@ -2519,6 +2530,8 @@ make_external: rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr); if (rec) { + page_cursor->rec = rec; + lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), rec, block); @@ -2532,7 +2545,10 @@ make_external: rec, index, offsets, mtr); } - btr_cur_compress_if_useful(cursor, mtr); + btr_cur_compress_if_useful( + cursor, + big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), + mtr); if (page_zip && !dict_index_is_clust(index) && page_is_leaf(page)) { @@ -2552,6 +2568,21 @@ make_external: } } + if (big_rec_vec) { + ut_ad(page_is_leaf(page)); + ut_ad(dict_index_is_clust(index)); + ut_ad(flags & BTR_KEEP_POS_FLAG); + + /* btr_page_split_and_insert() in + btr_cur_pessimistic_insert() invokes + mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). + We must keep the index->lock when we created a + big_rec, so that row_upd_clust_rec() can store the + big_rec in the same mini-transaction. */ + + mtr_x_lock(dict_index_get_lock(index), mtr); + } + /* Was the record to be updated positioned as the first user record on its page? */ was_first = page_cur_is_before_first(page_cursor); @@ -2567,6 +2598,7 @@ make_external: ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); + page_cursor->rec = rec; if (dict_index_is_sec_or_ibuf(index)) { /* Update PAGE_MAX_TRX_ID in the index page header. @@ -3011,10 +3043,12 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - mtr_t* mtr) /*!< in: mtr */ + btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; + cursor does not stay valid if !adjust and + compression occurs */ + ibool adjust, /*!< in: TRUE if should adjust the + cursor position even if compression occurs */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -3023,7 +3057,7 @@ btr_cur_compress_if_useful( MTR_MEMO_PAGE_X_FIX)); return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, mtr)); + && btr_compress(cursor, adjust, mtr)); } /*******************************************************//** @@ -3270,7 +3304,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, mtr); + ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); } if (n_extents > 0) { @@ -4011,10 +4045,10 @@ btr_cur_set_ownership_of_extern_field( byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; } - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val); page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr); - } else if (UNIV_LIKELY(mtr != NULL)) { + } else if (mtr != NULL) { mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr); @@ -4253,9 +4287,9 @@ The fields are stored on pages allocated from leaf node file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN -ulint -btr_store_big_rec_extern_fields_func( -/*=================================*/ +enum db_err +btr_store_big_rec_extern_fields( +/*============================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -4264,38 +4298,37 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ -#ifdef UNIV_DEBUG - mtr_t* local_mtr, /*!< in: mtr containing the - latch to rec and to the tree */ -#endif /* UNIV_DEBUG */ -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ibool update_in_place,/*! in: TRUE if the record is updated - in place (not delete+insert) */ -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - const big_rec_t*big_rec_vec) /*!< in: vector containing fields + const big_rec_t*big_rec_vec, /*!< in: vector containing fields to be stored externally */ - + mtr_t* btr_mtr, /*!< in: mtr containing the + latches to the clustered index */ + enum blob_op op) /*! in: operation code */ { - ulint rec_page_no; - byte* field_ref; - ulint extern_len; - ulint store_len; - ulint page_no; - ulint space_id; - ulint zip_size; - ulint prev_page_no; - ulint hint_page_no; - ulint i; - mtr_t mtr; - mem_heap_t* heap = NULL; + ulint rec_page_no; + byte* field_ref; + ulint extern_len; + ulint store_len; + ulint page_no; + ulint space_id; + ulint zip_size; + ulint prev_page_no; + ulint hint_page_no; + ulint i; + mtr_t mtr; + mtr_t* alloc_mtr; + mem_heap_t* heap = NULL; page_zip_des_t* page_zip; - z_stream c_stream; + z_stream c_stream; + buf_block_t** freed_pages = NULL; + ulint n_freed_pages = 0; + enum db_err error = DB_SUCCESS; ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), + ut_ad(btr_mtr); + ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); ut_ad(buf_block_get_frame(rec_block) == page_align(rec)); ut_a(dict_index_is_clust(index)); @@ -4308,7 +4341,7 @@ btr_store_big_rec_extern_fields_func( rec_page_no = buf_block_get_page_no(rec_block); ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { int err; /* Zlib deflate needs 128 kilobytes for the default @@ -4324,6 +4357,42 @@ btr_store_big_rec_extern_fields_func( ut_a(err == Z_OK); } + if (btr_blob_op_is_update(op)) { + /* Avoid reusing pages that have been previously freed + in btr_mtr. */ + if (btr_mtr->n_freed_pages) { + if (heap == NULL) { + heap = mem_heap_create( + btr_mtr->n_freed_pages + * sizeof *freed_pages); + } + + freed_pages = mem_heap_alloc( + heap, + btr_mtr->n_freed_pages + * sizeof *freed_pages); + n_freed_pages = 0; + } + + /* Because btr_mtr will be committed after mtr, it is + possible that the tablespace has been extended when + the B-tree record was updated or inserted, or it will + be extended while allocating pages for big_rec. + + TODO: In mtr (not btr_mtr), write a redo log record + about extending the tablespace to its current size, + and remember the current size. Whenever the tablespace + grows as pages are allocated, write further redo log + records to mtr. (Currently tablespace extension is not + covered by the redo log. If it were, the record would + only be written to btr_mtr, which is committed after + mtr.) */ + alloc_mtr = btr_mtr; + } else { + /* Use the local mtr for allocations. */ + alloc_mtr = &mtr; + } + #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG /* All pointers to externally stored columns in the record must either be zero or they must be pointers to inherited @@ -4338,7 +4407,7 @@ btr_store_big_rec_extern_fields_func( /* Either this must be an update in place, or the BLOB must be inherited, or the BLOB pointer must be zero (will be written in this function). */ - ut_a(update_in_place + ut_a(op == BTR_STORE_UPDATE || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) || !memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); @@ -4363,7 +4432,7 @@ btr_store_big_rec_extern_fields_func( prev_page_no = FIL_NULL; - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { int err = deflateReset(&c_stream); ut_a(err == Z_OK); @@ -4383,18 +4452,24 @@ btr_store_big_rec_extern_fields_func( hint_page_no = prev_page_no + 1; } +alloc_another: block = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, &mtr); + FSP_NO_DIR, 0, alloc_mtr, &mtr); if (UNIV_UNLIKELY(block == NULL)) { - mtr_commit(&mtr); + error = DB_OUT_OF_FILE_SPACE; + goto func_exit; + } - if (UNIV_LIKELY_NULL(page_zip)) { - deflateEnd(&c_stream); - mem_heap_free(heap); - } - - return(DB_OUT_OF_FILE_SPACE); + if (rw_lock_get_x_lock_count(&block->lock) > 1) { + /* This page must have been freed in + btr_mtr previously. Put it aside, and + allocate another page for the BLOB data. */ + ut_ad(alloc_mtr == btr_mtr); + ut_ad(btr_blob_op_is_update(op)); + ut_ad(n_freed_pages < btr_mtr->n_freed_pages); + freed_pages[n_freed_pages++] = block; + goto alloc_another; } page_no = buf_block_get_page_no(block); @@ -4411,7 +4486,7 @@ btr_store_big_rec_extern_fields_func( SYNC_EXTERN_STORAGE); prev_page = buf_block_get_frame(prev_block); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mlog_write_ulint( prev_page + FIL_PAGE_NEXT, page_no, MLOG_4BYTES, &mtr); @@ -4428,7 +4503,7 @@ btr_store_big_rec_extern_fields_func( } - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { int err; page_zip_des_t* blob_page_zip; @@ -4511,11 +4586,15 @@ btr_store_big_rec_extern_fields_func( goto next_zip_page; } - rec_block = buf_page_get(space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(rec_block, - SYNC_NO_ORDER_CHECK); + if (alloc_mtr == &mtr) { + rec_block = buf_page_get( + space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level( + rec_block, + SYNC_NO_ORDER_CHECK); + } if (err == Z_STREAM_END) { mach_write_to_4(field_ref @@ -4549,7 +4628,8 @@ btr_store_big_rec_extern_fields_func( page_zip_write_blob_ptr( page_zip, rec, index, offsets, - big_rec_vec->fields[i].field_no, &mtr); + big_rec_vec->fields[i].field_no, + alloc_mtr); next_zip_page: prev_page_no = page_no; @@ -4594,19 +4674,23 @@ next_zip_page: extern_len -= store_len; - rec_block = buf_page_get(space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(rec_block, - SYNC_NO_ORDER_CHECK); + if (alloc_mtr == &mtr) { + rec_block = buf_page_get( + space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level( + rec_block, + SYNC_NO_ORDER_CHECK); + } mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); if (prev_page_no == FIL_NULL) { btr_blob_dbg_add_blob( @@ -4616,18 +4700,19 @@ next_zip_page: mlog_write_ulint(field_ref + BTR_EXTERN_SPACE_ID, - space_id, - MLOG_4BYTES, &mtr); + space_id, MLOG_4BYTES, + alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, - page_no, - MLOG_4BYTES, &mtr); + page_no, MLOG_4BYTES, + alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, + alloc_mtr); } prev_page_no = page_no; @@ -4641,8 +4726,23 @@ next_zip_page: } } - if (UNIV_LIKELY_NULL(page_zip)) { +func_exit: + if (page_zip) { deflateEnd(&c_stream); + } + + if (n_freed_pages) { + ulint i; + + ut_ad(alloc_mtr == btr_mtr); + ut_ad(btr_blob_op_is_update(op)); + + for (i = 0; i < n_freed_pages; i++) { + btr_page_free_low(index, freed_pages[i], 0, alloc_mtr); + } + } + + if (heap != NULL) { mem_heap_free(heap); } @@ -4663,7 +4763,7 @@ next_zip_page: ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); } #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - return(DB_SUCCESS); + return(error); } /*******************************************************************//** @@ -4866,7 +4966,7 @@ btr_free_externally_stored_field( btr_page_free_low(index, ext_block, 0, &mtr); - if (UNIV_LIKELY(page_zip != NULL)) { + if (page_zip) { mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO, next_page_no); mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4, diff --git a/btr/btr0pcur.c b/btr/btr0pcur.c index e6a9ddcf43e..0de7b63f92d 100644 --- a/btr/btr0pcur.c +++ b/btr/btr0pcur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -52,12 +52,13 @@ btr_pcur_create_for_mysql(void) } /**************************************************************//** -Frees the memory for a persistent cursor object. */ +Resets a persistent cursor object, freeing ::old_rec_buf if it is +allocated and resetting the other members to their initial values. */ UNIV_INTERN void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor) /*!< in, own: persistent cursor */ +btr_pcur_reset( +/*===========*/ + btr_pcur_t* cursor) /*!< in, out: persistent cursor */ { if (cursor->old_rec_buf != NULL) { @@ -66,6 +67,7 @@ btr_pcur_free_for_mysql( cursor->old_rec_buf = NULL; } + cursor->btr_cur.index = NULL; cursor->btr_cur.page_cur.rec = NULL; cursor->old_rec = NULL; cursor->old_n_fields = 0; @@ -73,7 +75,17 @@ btr_pcur_free_for_mysql( cursor->latch_mode = BTR_NO_LATCHES; cursor->pos_state = BTR_PCUR_NOT_POSITIONED; +} +/**************************************************************//** +Frees the memory for a persistent cursor object. */ +UNIV_INTERN +void +btr_pcur_free_for_mysql( +/*====================*/ + btr_pcur_t* cursor) /*!< in, own: persistent cursor */ +{ + btr_pcur_reset(cursor); mem_free(cursor); } @@ -127,6 +139,8 @@ btr_pcur_store_position( ut_a(btr_page_get_next(page, mtr) == FIL_NULL); ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_ad(page_is_leaf(page)); + ut_ad(page_get_page_no(page) == index->page); cursor->old_stored = BTR_PCUR_OLD_STORED; @@ -319,13 +333,19 @@ btr_pcur_restore_position_func( /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; - if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) { + switch (cursor->rel_pos) { + case BTR_PCUR_ON: mode = PAGE_CUR_LE; - } else if (cursor->rel_pos == BTR_PCUR_AFTER) { + break; + case BTR_PCUR_AFTER: mode = PAGE_CUR_G; - } else { - ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE); + break; + case BTR_PCUR_BEFORE: mode = PAGE_CUR_L; + break; + default: + ut_error; + mode = 0; } btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode, @@ -334,25 +354,44 @@ btr_pcur_restore_position_func( /* Restore the old search mode */ cursor->search_mode = old_mode; - if (cursor->rel_pos == BTR_PCUR_ON - && btr_pcur_is_on_user_rec(cursor) - && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), - rec_get_offsets( - btr_pcur_get_rec(cursor), index, - NULL, ULINT_UNDEFINED, &heap))) { + if (btr_pcur_is_on_user_rec(cursor)) { + switch (cursor->rel_pos) { + case BTR_PCUR_ON: + if (!cmp_dtuple_rec( + tuple, btr_pcur_get_rec(cursor), + rec_get_offsets(btr_pcur_get_rec(cursor), + index, NULL, + ULINT_UNDEFINED, &heap))) { - /* We have to store the NEW value for the modify clock, since - the cursor can now be on a different page! But we can retain - the value of old_rec */ + /* We have to store the NEW value for + the modify clock, since the cursor can + now be on a different page! But we can + retain the value of old_rec */ - cursor->block_when_stored = btr_pcur_get_block(cursor); - cursor->modify_clock = buf_block_get_modify_clock( - cursor->block_when_stored); - cursor->old_stored = BTR_PCUR_OLD_STORED; + cursor->block_when_stored = + btr_pcur_get_block(cursor); + cursor->modify_clock = + buf_block_get_modify_clock( + cursor->block_when_stored); + cursor->old_stored = BTR_PCUR_OLD_STORED; - mem_heap_free(heap); + mem_heap_free(heap); - return(TRUE); + return(TRUE); + } + + break; + case BTR_PCUR_BEFORE: + page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); + break; + case BTR_PCUR_AFTER: + page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); + break; +#ifdef UNIV_DEBUG + default: + ut_error; +#endif /* UNIV_DEBUG */ + } } mem_heap_free(heap); diff --git a/btr/btr0sea.c b/btr/btr0sea.c index bddbcc79dd6..855ab62c42f 100644 --- a/btr/btr0sea.c +++ b/btr/btr0sea.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -1240,7 +1240,7 @@ cleanup: index->name, (ulong) block->n_pointers); rw_lock_x_unlock(btr_search_get_latch(index->id)); - btr_search_validate(); + ut_ad(btr_search_validate()); } else { rw_lock_x_unlock(btr_search_get_latch(index->id)); } @@ -2143,7 +2143,9 @@ btr_search_validate(void) (ulong) block->curr_left_side); if (n_page_dumps < 20) { - buf_page_print(page, 0); + buf_page_print( + page, 0, + BUF_PAGE_PRINT_NO_CRASH); n_page_dumps++; } } diff --git a/buf/buf0buf.c b/buf/buf0buf.c index b3f6600dca6..fd7b8959473 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -57,7 +57,9 @@ Created 11/5/1995 Heikki Tuuri /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); -inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) +static inline +void +_increment_page_get_statistics(buf_block_t* block, trx_t* trx) { ulint block_hash; ulint block_hash_byte; @@ -680,8 +682,12 @@ void buf_page_print( /*===========*/ const byte* read_buf, /*!< in: a database page */ - ulint zip_size) /*!< in: compressed page size, or - 0 for uncompressed pages */ + ulint zip_size, /*!< in: compressed page size, or + 0 for uncompressed pages */ + ulint flags) /*!< in: 0 or + BUF_PAGE_PRINT_NO_CRASH or + BUF_PAGE_PRINT_NO_FULL */ + { #ifndef UNIV_HOTBACKUP dict_index_t* index; @@ -695,11 +701,14 @@ buf_page_print( size = UNIV_PAGE_SIZE; } - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n", - (ulong) size); - ut_print_buf(stderr, read_buf, size); - fputs("\nInnoDB: End of page dump\n", stderr); + if (!(flags & BUF_PAGE_PRINT_NO_FULL)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Page dump in ascii and hex (%lu bytes):\n", + (ulong) size); + ut_print_buf(stderr, read_buf, size); + fputs("\nInnoDB: End of page dump\n", stderr); + } if (zip_size) { /* Print compressed page. */ @@ -873,6 +882,8 @@ buf_page_print( stderr); break; } + + ut_ad(flags & BUF_PAGE_PRINT_NO_CRASH); } #ifndef UNIV_HOTBACKUP @@ -1078,11 +1089,8 @@ buf_chunk_init( for (i = chunk->size; i--; ) { buf_block_init(buf_pool, block, frame); + UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); -#ifdef HAVE_purify - /* Wipe contents of frame to eliminate a Purify warning */ - memset(block->frame, '\0', UNIV_PAGE_SIZE); -#endif /* Add the block to the free list */ mutex_enter(&buf_pool->free_list_mutex); UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page)); @@ -2768,7 +2776,7 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock_func(&block->lock, 0, file, line); + rw_lock_x_lock_inline(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); @@ -2931,14 +2939,14 @@ wait_until_unfixed: break; case RW_S_LATCH: - rw_lock_s_lock_func(&(block->lock), 0, file, line); + rw_lock_s_lock_inline(&(block->lock), 0, file, line); fix_type = MTR_MEMO_PAGE_S_FIX; break; default: ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_func(&(block->lock), 0, file, line); + rw_lock_x_lock_inline(&(block->lock), 0, file, line); fix_type = MTR_MEMO_PAGE_X_FIX; break; @@ -3023,8 +3031,8 @@ buf_page_optimistic_get( file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_x_lock_func_nowait_inline(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -3165,8 +3173,8 @@ buf_page_get_known_nowait( file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_x_lock_func_nowait_inline(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -3186,7 +3194,7 @@ buf_page_get_known_nowait( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - ut_a(block->page.file_page_was_freed == FALSE); + ut_a(mode == BUF_KEEP_OLD || !block->page.file_page_was_freed); #endif #ifdef UNIV_IBUF_COUNT_DEBUG @@ -3261,8 +3269,8 @@ buf_page_try_get_func( S-latch. */ fix_type = MTR_MEMO_PAGE_X_FIX; - success = rw_lock_x_lock_func_nowait(&block->lock, - file, line); + success = rw_lock_x_lock_func_nowait_inline(&block->lock, + file, line); } if (!success) { @@ -3997,7 +4005,8 @@ corrupt: "InnoDB: You may have to recover" " from a backup.\n", (ulong) bpage->offset); - buf_page_print(frame, buf_page_get_zip_size(bpage)); + buf_page_print(frame, buf_page_get_zip_size(bpage), + BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "InnoDB: Database page corruption on disk" " or a failed\n" @@ -5285,34 +5294,32 @@ buf_all_freed(void) /*********************************************************************//** Checks that there currently are no pending i/o-operations for the buffer pool. -@return TRUE if there is no pending i/o */ +@return number of pending i/o */ UNIV_INTERN -ibool -buf_pool_check_no_pending_io(void) -/*==============================*/ +ulint +buf_pool_check_num_pending_io(void) +/*===============================*/ { ulint i; - ibool ret = TRUE; + ulint pending_io = 0; buf_pool_mutex_enter_all(); - for (i = 0; i < srv_buf_pool_instances && ret; i++) { + for (i = 0; i < srv_buf_pool_instances; i++) { const buf_pool_t* buf_pool; buf_pool = buf_pool_from_array(i); - if (buf_pool->n_pend_reads - + buf_pool->n_flush[BUF_FLUSH_LRU] - + buf_pool->n_flush[BUF_FLUSH_LIST] - + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) { + pending_io += buf_pool->n_pend_reads + + buf_pool->n_flush[BUF_FLUSH_LRU] + + buf_pool->n_flush[BUF_FLUSH_LIST] + + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]; - ret = FALSE; - } } buf_pool_mutex_exit_all(); - return(ret); + return(pending_io); } #if 0 diff --git a/buf/buf0flu.c b/buf/buf0flu.c index d776a274d14..bb921928653 100644 --- a/buf/buf0flu.c +++ b/buf/buf0flu.c @@ -767,7 +767,8 @@ buf_flush_buffered_writes(void) if (UNIV_UNLIKELY (!page_simple_validate_new(block->frame))) { corrupted_page: - buf_page_print(block->frame, 0); + buf_page_print(block->frame, 0, + BUF_PAGE_PRINT_NO_CRASH); ut_print_timestamp(stderr); fprintf(stderr, @@ -2036,22 +2037,6 @@ buf_flush_list( buf_pool = buf_pool_from_array(i); - if (lsn_limit != IB_ULONGLONG_MAX) { - buf_page_t* bpage; - - buf_flush_list_mutex_enter(buf_pool); - bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - if (!bpage - || bpage->oldest_modification >= lsn_limit) { - - buf_flush_list_mutex_exit(buf_pool); - continue; - } else { - - buf_flush_list_mutex_exit(buf_pool); - } - } - if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) { /* We have two choices here. If lsn_limit was specified then skipping an instance of buffer diff --git a/buf/buf0lru.c b/buf/buf0lru.c index daa2cd93255..132b12899b4 100644 --- a/buf/buf0lru.c +++ b/buf/buf0lru.c @@ -48,6 +48,7 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "log0recv.h" #include "srv0srv.h" +#include "srv0start.h" /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV @@ -371,42 +372,278 @@ next_page: ut_free(page_arr); } +/******************************************************************//** +While flushing (or removing dirty) pages from a tablespace we don't +want to hog the CPU and resources. Release the buffer pool and block +mutex and try to force a context switch. Then reacquire the same mutexes. +The current page is "fixed" before the release of the mutexes and then +"unfixed" again once we have reacquired the mutexes. */ +static +void +buf_flush_yield( +/*============*/ + buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ + buf_page_t* bpage) /*!< in/out: current page */ +{ + mutex_t* block_mutex; + + ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(buf_page_in_file(bpage)); + + block_mutex = buf_page_get_mutex(bpage); + + mutex_enter(block_mutex); + /* "Fix" the block so that the position cannot be + changed after we release the buffer pool and + block mutexes. */ + buf_page_set_sticky(bpage); + + /* Now it is safe to release the buf_pool->mutex. */ + buf_pool_mutex_exit(buf_pool); + + mutex_exit(block_mutex); + /* Try and force a context switch. */ + os_thread_yield(); + + buf_pool_mutex_enter(buf_pool); + + mutex_enter(block_mutex); + /* "Unfix" the block now that we have both the + buffer pool and block mutex again. */ + buf_page_unset_sticky(bpage); + mutex_exit(block_mutex); +} + +/******************************************************************//** +If we have hogged the resources for too long then release the buffer +pool and flush list mutex and do a thread yield. Set the current page +to "sticky" so that it is not relocated during the yield. +@return TRUE if yielded */ +static +ibool +buf_flush_try_yield( +/*================*/ + buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ + buf_page_t* bpage, /*!< in/out: bpage to remove */ + ulint processed) /*!< in: number of pages processed */ +{ + /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the + loop we release buf_pool->mutex to let other threads + do their job but only if the block is not IO fixed. This + ensures that the block stays in its position in the + flush_list. */ + + if (bpage != NULL + && processed >= BUF_LRU_DROP_SEARCH_SIZE + && buf_page_get_io_fix(bpage) == BUF_IO_NONE) { + + buf_flush_list_mutex_exit(buf_pool); + + /* Release the buffer pool and block mutex + to give the other threads a go. */ + + buf_flush_yield(buf_pool, bpage); + + buf_flush_list_mutex_enter(buf_pool); + + /* Should not have been removed from the flush + list during the yield. However, this check is + not sufficient to catch a remove -> add. */ + + ut_ad(bpage->in_flush_list); + + return(TRUE); + } + + return(FALSE); +} + +/******************************************************************//** +Removes a single page from a given tablespace inside a specific +buffer pool instance. +@return TRUE if page was removed. */ +static +ibool +buf_flush_or_remove_page( +/*=====================*/ + buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ + buf_page_t* bpage) /*!< in/out: bpage to remove */ +{ + mutex_t* block_mutex; + ibool processed = FALSE; + + ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(buf_flush_list_mutex_own(buf_pool)); + + block_mutex = buf_page_get_mutex(bpage); + + /* bpage->space and bpage->io_fix are protected by + buf_pool->mutex and block_mutex. It is safe to check + them while holding buf_pool->mutex only. */ + + if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + + /* We cannot remove this page during this scan + yet; maybe the system is currently reading it + in, or flushing the modifications to the file */ + + } else { + + /* We have to release the flush_list_mutex to obey the + latching order. We are however guaranteed that the page + will stay in the flush_list because buf_flush_remove() + needs buf_pool->mutex as well (for the non-flush case). */ + + buf_flush_list_mutex_exit(buf_pool); + + mutex_enter(block_mutex); + + ut_ad(bpage->oldest_modification != 0); + + if (bpage->buf_fix_count == 0) { + + buf_flush_remove(bpage); + + processed = TRUE; + } + + mutex_exit(block_mutex); + + buf_flush_list_mutex_enter(buf_pool); + } + + ut_ad(!mutex_own(block_mutex)); + + return(processed); +} + /******************************************************************//** Remove all dirty pages belonging to a given tablespace inside a specific buffer pool instance when we are deleting the data file(s) of that tablespace. The pages still remain a part of LRU and are evicted from -the list as they age towards the tail of the LRU. */ +the list as they age towards the tail of the LRU. +@return TRUE if all freed. */ +static +ibool +buf_flush_or_remove_pages( +/*======================*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + ulint id) /*!< in: target space id for which + to remove or flush pages */ +{ + buf_page_t* prev; + buf_page_t* bpage; + ulint processed = 0; + ibool all_freed = TRUE; + + buf_flush_list_mutex_enter(buf_pool); + + for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list); + bpage != NULL; + bpage = prev) { + + ut_a(buf_page_in_file(bpage)); + ut_ad(bpage->in_flush_list); + + /* Save the previous link because once we free the + page we can't rely on the links. */ + + prev = UT_LIST_GET_PREV(flush_list, bpage); + + if (buf_page_get_space(bpage) != id) { + + /* Skip this block, as it does not belong to + the target space. */ + + } else if (!buf_flush_or_remove_page(buf_pool, bpage)) { + + /* Remove was unsuccessful, we have to try again + by scanning the entire list from the end. */ + + all_freed = FALSE; + } + + ++processed; + + /* Yield if we have hogged the CPU and mutexes for too long. */ + if (buf_flush_try_yield(buf_pool, prev, processed)) { + + /* Reset the batch size counter if we had to yield. */ + + processed = 0; + } + + } + + buf_flush_list_mutex_exit(buf_pool); + + return(all_freed); +} + +/******************************************************************//** +Remove or flush all the dirty pages that belong to a given tablespace +inside a specific buffer pool instance. The pages will remain in the LRU +list and will be evicted from the LRU list as they age and move towards +the tail of the LRU list. */ static void -buf_LRU_remove_dirty_pages_for_tablespace( -/*======================================*/ +buf_flush_dirty_pages( +/*==================*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + ulint id) /*!< in: space id */ +{ + ibool all_freed; + + do { + buf_pool_mutex_enter(buf_pool); + + all_freed = buf_flush_or_remove_pages(buf_pool, id); + + buf_pool_mutex_exit(buf_pool); + + ut_ad(buf_flush_validate(buf_pool)); + + if (!all_freed) { + os_thread_sleep(20000); + } + + } while (!all_freed); +} + +/******************************************************************//** +Remove all pages that belong to a given tablespace inside a specific +buffer pool instance when we are DISCARDing the tablespace. */ +static +void +buf_LRU_remove_all_pages( +/*=====================*/ buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint id) /*!< in: space id */ { buf_page_t* bpage; ibool all_freed; - ulint i; scan_again: //buf_pool_mutex_enter(buf_pool); mutex_enter(&buf_pool->LRU_list_mutex); rw_lock_x_lock(&buf_pool->page_hash_latch); - buf_flush_list_mutex_enter(buf_pool); all_freed = TRUE; - for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list), i = 0; - bpage != NULL; ++i) { + for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); + bpage != NULL; + /* No op */) { buf_page_t* prev_bpage; mutex_t* block_mutex = NULL; ut_a(buf_page_in_file(bpage)); + ut_ad(bpage->in_LRU_list); - prev_bpage = UT_LIST_GET_PREV(flush_list, bpage); + prev_bpage = UT_LIST_GET_PREV(LRU, bpage); /* bpage->space and bpage->io_fix are protected by - buf_pool->mutex and block_mutex. It is safe to check + buf_pool->mutex and the block_mutex. It is safe to check them while holding buf_pool->mutex only. */ if (buf_page_get_space(bpage) != id) { @@ -420,97 +657,89 @@ scan_again: all_freed = FALSE; goto next_page; + } else { + + block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); + + if (bpage->buf_fix_count > 0) { + + mutex_exit(block_mutex); + + /* We cannot remove this page during + this scan yet; maybe the system is + currently reading it in, or flushing + the modifications to the file */ + + all_freed = FALSE; + + goto next_page; + } } - /* We have to release the flush_list_mutex to obey the - latching order. We are however guaranteed that the page - will stay in the flush_list because buf_flush_remove() - needs buf_pool->mutex as well. */ - buf_flush_list_mutex_exit(buf_pool); - block_mutex = buf_page_get_mutex_enter(bpage); + ut_ad(mutex_own(block_mutex)); - if (!block_mutex) { - /* It may be impossible case... - Something wrong, so will be scan_again */ - all_freed = FALSE; - goto next_page; +#ifdef UNIV_DEBUG + if (buf_debug_prints) { + fprintf(stderr, + "Dropping space %lu page %lu\n", + (ulong) buf_page_get_space(bpage), + (ulong) buf_page_get_page_no(bpage)); } +#endif + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { + /* Do nothing, because the adaptive hash index + covers uncompressed pages only. */ + } else if (((buf_block_t*) bpage)->index) { + ulint page_no; + ulint zip_size; + + buf_pool_mutex_exit(buf_pool); + + zip_size = buf_page_get_zip_size(bpage); + page_no = buf_page_get_page_no(bpage); - if (bpage->buf_fix_count > 0) { mutex_exit(block_mutex); - buf_flush_list_mutex_enter(buf_pool); - /* We cannot remove this page during - this scan yet; maybe the system is - currently reading it in, or flushing - the modifications to the file */ + /* Note that the following call will acquire + and release block->lock X-latch. */ - all_freed = FALSE; - goto next_page; + btr_search_drop_page_hash_when_freed( + id, zip_size, page_no); + + goto scan_again; } - ut_ad(bpage->oldest_modification != 0); + if (bpage->oldest_modification != 0) { + buf_flush_remove(bpage); + } - buf_flush_remove(bpage); + ut_ad(!bpage->in_flush_list); + + /* Remove from the LRU list. */ + + if (buf_LRU_block_remove_hashed_page(bpage, TRUE) + != BUF_BLOCK_ZIP_FREE) { + + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, TRUE); + mutex_exit(block_mutex); + + } else { + /* The block_mutex should have been released + by buf_LRU_block_remove_hashed_page() when it + returns BUF_BLOCK_ZIP_FREE. */ + ut_ad(block_mutex == &buf_pool->zip_mutex); + } + + ut_ad(!mutex_own(block_mutex)); - mutex_exit(block_mutex); - buf_flush_list_mutex_enter(buf_pool); next_page: bpage = prev_bpage; - - if (!bpage) { - break; - } - - /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the - loop we release buf_pool->mutex to let other threads - do their job. */ - if (i < BUF_LRU_DROP_SEARCH_SIZE) { - continue; - } - - /* We IO-fix the block to make sure that the block - stays in its position in the flush_list. */ - if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) { - /* Block is already IO-fixed. We don't - want to change the value. Lets leave - this block alone. */ - continue; - } - - buf_flush_list_mutex_exit(buf_pool); - block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); - buf_page_set_sticky(bpage); - mutex_exit(block_mutex); - - /* Now it is safe to release the buf_pool->mutex. */ - //buf_pool_mutex_exit(buf_pool); - mutex_exit(&buf_pool->LRU_list_mutex); - rw_lock_x_unlock(&buf_pool->page_hash_latch); - - os_thread_yield(); - //buf_pool_mutex_enter(buf_pool); - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(&buf_pool->page_hash_latch); - - - mutex_enter(block_mutex); - buf_page_unset_sticky(bpage); - mutex_exit(block_mutex); - - buf_flush_list_mutex_enter(buf_pool); - ut_ad(bpage->in_flush_list); - - i = 0; } // buf_pool_mutex_exit(buf_pool); mutex_exit(&buf_pool->LRU_list_mutex); rw_lock_x_unlock(&buf_pool->page_hash_latch); - buf_flush_list_mutex_exit(buf_pool); - - ut_ad(buf_flush_validate(buf_pool)); if (!all_freed) { os_thread_sleep(20000); @@ -520,28 +749,46 @@ next_page: } /******************************************************************//** -Invalidates all pages belonging to a given tablespace when we are deleting -the data file(s) of that tablespace. */ +Removes all pages belonging to a given tablespace. */ UNIV_INTERN void -buf_LRU_invalidate_tablespace( +buf_LRU_flush_or_remove_pages( /*==========================*/ - ulint id) /*!< in: space id */ + ulint id, /*!< in: space id */ + enum buf_remove_t buf_remove)/*!< in: remove or flush + strategy */ { - ulint i; + ulint i; - /* Before we attempt to drop pages one by one we first - attempt to drop page hash index entries in batches to make - it more efficient. The batching attempt is a best effort - attempt and does not guarantee that all pages hash entries - will be dropped. We get rid of remaining page hash entries - one by one below. */ for (i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool; buf_pool = buf_pool_from_array(i); - buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); - buf_LRU_remove_dirty_pages_for_tablespace(buf_pool, id); + + switch (buf_remove) { + case BUF_REMOVE_ALL_NO_WRITE: + /* A DISCARD tablespace case. Remove AHI entries + and evict all pages from LRU. */ + + /* Before we attempt to drop pages hash entries + one by one we first attempt to drop page hash + index entries in batches to make it more + efficient. The batching attempt is a best effort + attempt and does not guarantee that all pages + hash entries will be dropped. We get rid of + remaining page hash entries one by one below. */ + buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); + buf_LRU_remove_all_pages(buf_pool, id); + break; + + case BUF_REMOVE_FLUSH_NO_WRITE: + /* A DROP table case. AHI entries are already + removed. No need to evict all pages from LRU + list. Just evict pages from flush list without + writing. */ + buf_flush_dirty_pages(buf_pool, id); + break; + } } } @@ -2260,6 +2507,7 @@ func_exit: /********************************************************************//** Dump the LRU page list to the specific file. */ #define LRU_DUMP_FILE "ib_lru_dump" +#define LRU_DUMP_TEMP_FILE "ib_lru_dump.tmp" UNIV_INTERN ibool @@ -2294,8 +2542,10 @@ buf_LRU_file_dump(void) goto end; } - dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE, - OS_FILE_NORMAL, OS_DATA_FILE, &success); + dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_TEMP_FILE, + OS_FILE_OVERWRITE, OS_FILE_NORMAL, OS_DATA_FILE, + &success); + if (!success) { os_file_get_last_error(TRUE); fprintf(stderr, @@ -2324,6 +2574,13 @@ buf_LRU_file_dump(void) offset++; if (offset == UNIV_PAGE_SIZE/4) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { + success = 0; + fprintf(stderr, + " InnoDB: stopped dumping lru" + " pages because of server" + " shutdown.\n"); + } success = os_file_write(LRU_DUMP_FILE, dump_file, buffer, (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)), @@ -2363,8 +2620,16 @@ buf_LRU_file_dump(void) ret = TRUE; end: - if (dump_file != -1) + if (dump_file != -1) { + if (success) { + success = os_file_flush(dump_file, TRUE); + } os_file_close(dump_file); + } + if (success) { + success = os_file_rename(innodb_file_temp_key, + LRU_DUMP_TEMP_FILE, LRU_DUMP_FILE); + } if (buffer_base) ut_free(buffer_base); @@ -2410,6 +2675,7 @@ buf_LRU_file_restore(void) dump_record_t* records = NULL; ulint size; ulint size_high; + ulint recsize = sizeof(dump_record_t); ulint length; dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key, @@ -2417,7 +2683,14 @@ buf_LRU_file_restore(void) if (!success || !os_file_get_size(dump_file, &size, &size_high)) { os_file_get_last_error(TRUE); fprintf(stderr, - " InnoDB: cannot open %s\n", LRU_DUMP_FILE); + " InnoDB: cannot open %s, " + " buffer pool preload not done.\n", LRU_DUMP_FILE); + goto end; + } + + if (size == 0 || size_high > 0 || size % recsize) { + fprintf(stderr, " InnoDB: broken LRU dump file," + " buffer pool preload not done\n"); goto end; } @@ -2501,6 +2774,14 @@ buf_LRU_file_restore(void) if (offset % 16 == 15) { os_aio_simulated_wake_handler_threads(); buf_flush_free_margins(FALSE); + /* skip loading of the rest of the file if we are + terminating anyway */ + if(srv_shutdown_state != SRV_SHUTDOWN_NONE) { + fprintf(stderr, + " InnoDB: stopped loading lru pages" + " because of server shutdown\n"); + break; + } } zip_size = fil_space_get_zip_size(space_id); diff --git a/dict/dict0boot.c b/dict/dict0boot.c index fd08fa759ee..4bf69fa4e0b 100644 --- a/dict/dict0boot.c +++ b/dict/dict0boot.c @@ -239,6 +239,166 @@ dict_hdr_create( return(TRUE); } +/*****************************************************************//** +Verifies the SYS_STATS table by scanning its clustered index. This +function may only be called at InnoDB startup time. + +@return TRUE if SYS_STATS was verified successfully */ +UNIV_INTERN +ibool +dict_verify_xtradb_sys_stats(void) +/*==============================*/ +{ + dict_index_t* sys_stats_index; + ulint saved_srv_pass_corrupt_table = srv_pass_corrupt_table; + ibool result; + + sys_stats_index = dict_table_get_first_index(dict_sys->sys_stats); + + /* Since this may be called only during server startup, avoid hitting + various asserts by using XtraDB pass_corrupt_table option. */ + srv_pass_corrupt_table = 1; + result = btr_validate_index(sys_stats_index, NULL); + srv_pass_corrupt_table = saved_srv_pass_corrupt_table; + + return result; +} + +/*****************************************************************//** +Creates the B-tree for the SYS_STATS clustered index, adds the XtraDB +mark and the id of the index to the dictionary header page. Rewrites +both passed args. */ +static +void +dict_create_xtradb_sys_stats( +/*=========================*/ + dict_hdr_t** dict_hdr, /*!< in/out: dictionary header */ + mtr_t* mtr) /*!< in/out: mtr */ +{ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(*dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, mtr); + mlog_write_ull(*dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, mtr); + } + mtr_commit(mtr); + /* restart mtr */ + mtr_start(mtr); + *dict_hdr = dict_hdr_get(mtr); +} + +/*****************************************************************//** +Create the table and index structure of SYS_STATS for the dictionary +cache and add it there. If called for the first time, also support +wrong root page id injection for testing purposes. */ +static +void +dict_add_to_cache_xtradb_sys_stats( +/*===============================*/ + ibool first_time __attribute__((unused)), + /*!< in: first invocation flag. If + TRUE, optionally inject wrong root page + id */ + mem_heap_t* heap, /*!< in: memory heap for table/index + allocation */ + dict_hdr_t* dict_hdr, /*!< in: dictionary header */ + mtr_t* mtr) /*!< in: mtr */ +{ + dict_table_t* table; + dict_index_t* index; + ulint root_page_id; + ulint error; + + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif +#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 +#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + + root_page_id = mtr_read_ulint(dict_hdr + DICT_HDR_STATS, MLOG_4BYTES, + mtr); +#ifdef UNIV_DEBUG + if ((srv_sys_stats_root_page != 0) && first_time) + root_page_id = srv_sys_stats_root_page; +#endif + error = dict_index_add_to_cache(table, index, root_page_id, FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_empty(heap); +} + +/*****************************************************************//** +Discard the existing dictionary cache SYS_STATS information, create and +add it there anew. Does not touch the old SYS_STATS tablespace page +under the assumption that they are corrupted or overwritten for other +purposes. */ +UNIV_INTERN +void +dict_recreate_xtradb_sys_stats(void) +/*================================*/ +{ + mtr_t mtr; + dict_hdr_t* dict_hdr; + dict_index_t* sys_stats_clust_idx; + mem_heap_t* heap; + + heap = mem_heap_create(450); + + mutex_enter(&(dict_sys->mutex)); + + sys_stats_clust_idx = dict_table_get_first_index(dict_sys->sys_stats); + dict_index_remove_from_cache(dict_sys->sys_stats, sys_stats_clust_idx); + + dict_table_remove_from_cache(dict_sys->sys_stats); + + dict_sys->sys_stats = NULL; + + mtr_start(&mtr); + + dict_hdr = dict_hdr_get(&mtr); + + dict_create_xtradb_sys_stats(&dict_hdr, &mtr); + dict_add_to_cache_xtradb_sys_stats(FALSE, heap, dict_hdr, &mtr); + + mem_heap_free(heap); + + mtr_commit(&mtr); + + mutex_exit(&(dict_sys->mutex)); +} + /*****************************************************************//** Initializes the data dictionary memory structures when the database is started. This function is also called when the data dictionary is created. */ @@ -254,13 +414,13 @@ dict_boot(void) mtr_t mtr; ulint error; + heap = mem_heap_create(450); + mtr_start(&mtr); /* Create the hash tables etc. */ dict_init(); - heap = mem_heap_create(450); - mutex_enter(&(dict_sys->mutex)); /* Get the dictionary header */ @@ -268,25 +428,9 @@ dict_boot(void) if (mach_read_from_8(dict_hdr + DICT_HDR_XTRADB_MARK) != DICT_HDR_XTRADB_FLAG) { - /* not extended yet by XtraDB, need to be extended */ - ulint root_page_no; - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_STATS_ID, - dict_ind_redundant, &mtr); - if (root_page_no == FIL_NULL) { - fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); - srv_use_sys_stats_table = FALSE; - } else { - mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, - MLOG_4BYTES, &mtr); - mlog_write_ull(dict_hdr + DICT_HDR_XTRADB_MARK, - DICT_HDR_XTRADB_FLAG, &mtr); - } - mtr_commit(&mtr); - /* restart mtr */ - mtr_start(&mtr); - dict_hdr = dict_hdr_get(&mtr); + /* not extended yet by XtraDB, need to be extended */ + dict_create_xtradb_sys_stats(&dict_hdr, &mtr); } /* Because we only write new row ids to disk-based data structure @@ -465,42 +609,7 @@ dict_boot(void) FALSE); ut_a(error == DB_SUCCESS); - /*-------------------------*/ - table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); - table->n_mysql_handles_opened = 1; /* for pin */ - - dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); - - /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ -#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 -#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" -#endif -#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 -#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" -#endif - - table->id = DICT_STATS_ID; - dict_table_add_to_cache(table, heap); - dict_sys->sys_stats = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_STATS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "INDEX_ID", 0); - dict_mem_index_add_field(index, "KEY_COLS", 0); - - index->id = DICT_STATS_ID; - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_STATS, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); + dict_add_to_cache_xtradb_sys_stats(TRUE, heap, dict_hdr, &mtr); mem_heap_free(heap); diff --git a/dict/dict0dict.c b/dict/dict0dict.c index 43f28751a31..4c84a22c4b5 100644 --- a/dict/dict0dict.c +++ b/dict/dict0dict.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -56,6 +56,8 @@ UNIV_INTERN dict_index_t* dict_ind_compact; #include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str()*/ #include "row0upd.h" #include "srv0start.h" /* SRV_LOG_SPACE_FIRST_ID */ +#include "m_string.h" +#include "my_sys.h" #include @@ -2397,6 +2399,8 @@ dict_foreign_free( /*==============*/ dict_foreign_t* foreign) /*!< in, own: foreign key struct */ { + ut_a(foreign->foreign_table->n_foreign_key_checks_running == 0); + mem_heap_free(foreign->heap); } @@ -4378,25 +4382,32 @@ dict_reload_statistics( heap = mem_heap_create(1000); while (index) { + mtr_t mtr; + if (table->is_corrupt) { ut_a(srv_pass_corrupt_table); mem_heap_free(heap); return(FALSE); } - size = btr_get_size(index, BTR_TOTAL_SIZE); + mtr_start(&mtr); + mtr_s_lock(dict_index_get_lock(index), &mtr); + + size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); index->stat_index_size = size; *sum_of_index_sizes += size; - size = btr_get_size(index, BTR_N_LEAF_PAGES); + size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr); if (size == 0) { /* The root node of the tree is a leaf */ size = 1; } + mtr_commit(&mtr); + index->stat_n_leaf_pages = size; /*===========================================*/ @@ -4728,6 +4739,7 @@ dict_update_statistics( (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO && dict_index_is_clust(index)))) { + mtr_t mtr; ulint size; if (table->is_corrupt) { @@ -4736,15 +4748,24 @@ dict_update_statistics( return; } - size = btr_get_size(index, BTR_TOTAL_SIZE); + mtr_start(&mtr); + mtr_s_lock(dict_index_get_lock(index), &mtr); - index->stat_index_size = size; + size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); - sum_of_index_sizes += size; + if (size != ULINT_UNDEFINED) { + sum_of_index_sizes += size; + index->stat_index_size = size; + size = btr_get_size( + index, BTR_N_LEAF_PAGES, &mtr); + } - size = btr_get_size(index, BTR_N_LEAF_PAGES); + mtr_commit(&mtr); - if (size == 0) { + switch (size) { + case ULINT_UNDEFINED: + goto fake_statistics; + case 0: /* The root node of the tree is a leaf */ size = 1; } @@ -4761,6 +4782,7 @@ dict_update_statistics( various means, also via secondary indexes. */ ulint i; +fake_statistics: sum_of_index_sizes++; index->stat_index_size = index->stat_n_leaf_pages = 1; diff --git a/dict/dict0load.c b/dict/dict0load.c index 0e7c777ea1f..6029e6f0070 100644 --- a/dict/dict0load.c +++ b/dict/dict0load.c @@ -2138,8 +2138,9 @@ static void dict_load_foreign_cols( /*===================*/ - const char* id, /*!< in: foreign constraint id as a - null-terminated string */ + const char* id, /*!< in: foreign constraint id, not + necessary '\0'-terminated */ + ulint id_len, /*!< in: id length */ dict_foreign_t* foreign)/*!< in: foreign constraint object */ { dict_table_t* sys_foreign_cols; @@ -2169,7 +2170,7 @@ dict_load_foreign_cols( tuple = dtuple_create(foreign->heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - dfield_set_data(dfield, id, ut_strlen(id)); + dfield_set_data(dfield, id, id_len); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, @@ -2182,7 +2183,7 @@ dict_load_foreign_cols( ut_a(!rec_get_deleted_flag(rec, 0)); field = rec_get_nth_field_old(rec, 0, &len); - ut_a(len == ut_strlen(id)); + ut_a(len == id_len); ut_a(ut_memcmp(id, field, len) == 0); field = rec_get_nth_field_old(rec, 1, &len); @@ -2211,8 +2212,9 @@ static ulint dict_load_foreign( /*==============*/ - const char* id, /*!< in: foreign constraint id as a - null-terminated string */ + const char* id, /*!< in: foreign constraint id, not + necessary '\0'-terminated */ + ulint id_len, /*!< in: id length */ ibool check_charsets, /*!< in: TRUE=check charset compatibility */ ibool check_recursive) @@ -2248,7 +2250,7 @@ dict_load_foreign( tuple = dtuple_create(heap2, 1); dfield = dtuple_get_nth_field(tuple, 0); - dfield_set_data(dfield, id, ut_strlen(id)); + dfield_set_data(dfield, id, id_len); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, @@ -2260,8 +2262,8 @@ dict_load_foreign( /* Not found */ fprintf(stderr, - "InnoDB: Error A: cannot load foreign constraint %s\n", - id); + "InnoDB: Error A: cannot load foreign constraint " + "%.*s\n", (int) id_len, id); btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2273,11 +2275,11 @@ dict_load_foreign( field = rec_get_nth_field_old(rec, 0, &len); /* Check if the id in record is the searched one */ - if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { + if (len != id_len || ut_memcmp(id, field, len) != 0) { fprintf(stderr, - "InnoDB: Error B: cannot load foreign constraint %s\n", - id); + "InnoDB: Error B: cannot load foreign constraint " + "%.*s\n", (int) id_len, id); btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2303,7 +2305,7 @@ dict_load_foreign( foreign->type = (unsigned int) (n_fields_and_type >> 24); foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); - foreign->id = mem_heap_strdup(foreign->heap, id); + foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); field = rec_get_nth_field_old(rec, 3, &len); @@ -2319,7 +2321,7 @@ dict_load_foreign( btr_pcur_close(&pcur); mtr_commit(&mtr); - dict_load_foreign_cols(id, foreign); + dict_load_foreign_cols(id, id_len, foreign); ref_table = dict_table_check_if_in_cache_low( foreign->referenced_table_name_lookup); @@ -2398,8 +2400,8 @@ dict_load_foreigns( ibool check_charsets) /*!< in: TRUE=check charset compatibility */ { + char tuple_buf[DTUPLE_EST_ALLOC(1)]; btr_pcur_t pcur; - mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; dict_index_t* sec_index; @@ -2407,7 +2409,6 @@ dict_load_foreigns( const rec_t* rec; const byte* field; ulint len; - char* id ; ulint err; mtr_t mtr; @@ -2434,9 +2435,8 @@ dict_load_foreigns( sec_index = dict_table_get_next_index( dict_table_get_first_index(sys_foreign)); start_load: - heap = mem_heap_create(256); - tuple = dtuple_create(heap, 1); + tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, table_name, ut_strlen(table_name)); @@ -2490,7 +2490,6 @@ loop: /* Now we get a foreign key constraint id */ field = rec_get_nth_field_old(rec, 1, &len); - id = mem_heap_strdupl(heap, (char*) field, len); btr_pcur_store_position(&pcur, &mtr); @@ -2498,11 +2497,11 @@ loop: /* Load the foreign constraint definition to the dictionary cache */ - err = dict_load_foreign(id, check_charsets, check_recursive); + err = dict_load_foreign((char*) field, len, check_charsets, + check_recursive); if (err != DB_SUCCESS) { btr_pcur_close(&pcur); - mem_heap_free(heap); return(err); } @@ -2518,7 +2517,6 @@ next_rec: load_next_index: btr_pcur_close(&pcur); mtr_commit(&mtr); - mem_heap_free(heap); sec_index = dict_table_get_next_index(sec_index); diff --git a/fil/fil0fil.c b/fil/fil0fil.c index 649b6c36bcd..66cbdf51140 100644 --- a/fil/fil0fil.c +++ b/fil/fil0fil.c @@ -193,7 +193,7 @@ struct fil_space_struct { .ibd file of tablespace and want to stop temporarily posting of new i/o requests on the file */ - ibool stop_ibuf_merges; + ibool stop_new_ops; /*!< we set this TRUE when we start deleting a single-table tablespace */ ibool is_being_deleted; @@ -218,12 +218,13 @@ struct fil_space_struct { ulint n_pending_flushes; /*!< this is positive when flushing the tablespace to disk; dropping of the tablespace is forbidden if this is positive */ - ulint n_pending_ibuf_merges;/*!< this is positive - when merging insert buffer entries to - a page so that we may need to access - the ibuf bitmap page in the - tablespade: dropping of the tablespace - is forbidden if this is positive */ + ulint n_pending_ops;/*!< this is positive when we + have pending operations against this + tablespace. The pending operations can + be ibuf merges or lock validation code + trying to read a block. + Dropping of the tablespace is forbidden + if this is positive */ hash_node_t hash; /*!< hash chain node */ hash_node_t name_hash;/*!< hash chain the name_hash table */ #ifndef UNIV_HOTBACKUP @@ -974,11 +975,6 @@ retry: return; } - if (fil_system->n_open < fil_system->max_n_open) { - - return; - } - space = fil_space_get_by_id(space_id); if (space != NULL && space->stop_ios) { @@ -995,6 +991,25 @@ retry: mutex_exit(&fil_system->mutex); +#ifndef UNIV_HOTBACKUP + + /* Wake the i/o-handler threads to make sure pending + i/o's are performed */ + os_aio_simulated_wake_handler_threads(); + + /* The sleep here is just to give IO helper threads a + bit of time to do some work. It is not required that + all IO related to the tablespace being renamed must + be flushed here as we do fil_flush() in + fil_rename_tablespace() as well. */ + os_thread_sleep(20000); + +#endif /* UNIV_HOTBACKUP */ + + /* Flush tablespaces so that we can close modified + files in the LRU list */ + fil_flush_file_spaces(FIL_TABLESPACE); + os_thread_sleep(20000); count2++; @@ -1002,6 +1017,11 @@ retry: goto retry; } + if (fil_system->n_open < fil_system->max_n_open) { + + return; + } + /* If the file is already open, no need to do anything; if the space does not exist, we handle the situation in the function which called this function */ @@ -1274,7 +1294,7 @@ try_again: } space->stop_ios = FALSE; - space->stop_ibuf_merges = FALSE; + space->stop_new_ops = FALSE; space->is_being_deleted = FALSE; space->purpose = purpose; space->size = 0; @@ -1283,7 +1303,7 @@ try_again: space->n_reserved_extents = 0; space->n_pending_flushes = 0; - space->n_pending_ibuf_merges = 0; + space->n_pending_ops = 0; UT_LIST_INIT(space->chain); space->magic_n = FIL_SPACE_MAGIC_N; @@ -1896,13 +1916,12 @@ fil_read_first_page( #ifndef UNIV_HOTBACKUP /*******************************************************************//** -Increments the count of pending insert buffer page merges, if space is not -being deleted. -@return TRUE if being deleted, and ibuf merges should be skipped */ +Increments the count of pending operation, if space is not being deleted. +@return TRUE if being deleted, and operation should be skipped */ UNIV_INTERN ibool -fil_inc_pending_ibuf_merges( -/*========================*/ +fil_inc_pending_ops( +/*================*/ ulint id) /*!< in: space id */ { fil_space_t* space; @@ -1918,13 +1937,13 @@ fil_inc_pending_ibuf_merges( (ulong) id); } - if (space == NULL || space->stop_ibuf_merges) { + if (space == NULL || space->stop_new_ops) { mutex_exit(&fil_system->mutex); return(TRUE); } - space->n_pending_ibuf_merges++; + space->n_pending_ops++; mutex_exit(&fil_system->mutex); @@ -1932,11 +1951,11 @@ fil_inc_pending_ibuf_merges( } /*******************************************************************//** -Decrements the count of pending insert buffer page merges. */ +Decrements the count of pending operations. */ UNIV_INTERN void -fil_decr_pending_ibuf_merges( -/*=========================*/ +fil_decr_pending_ops( +/*=================*/ ulint id) /*!< in: space id */ { fil_space_t* space; @@ -1947,13 +1966,13 @@ fil_decr_pending_ibuf_merges( if (space == NULL) { fprintf(stderr, - "InnoDB: Error: decrementing ibuf merge of a" - " dropped tablespace %lu\n", + "InnoDB: Error: decrementing pending operation" + " of a dropped tablespace %lu\n", (ulong) id); } if (space != NULL) { - space->n_pending_ibuf_merges--; + space->n_pending_ops--; } mutex_exit(&fil_system->mutex); @@ -2164,7 +2183,7 @@ fil_op_log_parse_or_replay( switch (type) { case MLOG_FILE_DELETE: if (fil_tablespace_exists_in_mem(space_id)) { - ut_a(fil_delete_tablespace(space_id)); + ut_a(fil_delete_tablespace(space_id, TRUE)); } break; @@ -2234,7 +2253,9 @@ UNIV_INTERN ibool fil_delete_tablespace( /*==================*/ - ulint id) /*!< in: space id */ + ulint id, /*!< in: space id */ + ibool evict_all) /*!< in: TRUE if we want all pages + evicted from LRU. */ { ibool success; fil_space_t* space; @@ -2243,15 +2264,15 @@ fil_delete_tablespace( char* path; ut_a(id != 0); -stop_ibuf_merges: +stop_new_ops: mutex_enter(&fil_system->mutex); space = fil_space_get_by_id(id); if (space != NULL) { - space->stop_ibuf_merges = TRUE; + space->stop_new_ops = TRUE; - if (space->n_pending_ibuf_merges == 0) { + if (space->n_pending_ops == 0) { mutex_exit(&fil_system->mutex); count = 0; @@ -2265,9 +2286,10 @@ stop_ibuf_merges: ut_print_filename(stderr, space->name); fprintf(stderr, ",\n" "InnoDB: but there are %lu pending" - " ibuf merges on it.\n" + " operations (most likely ibuf merges)" + " on it.\n" "InnoDB: Loop %lu.\n", - (ulong) space->n_pending_ibuf_merges, + (ulong) space->n_pending_ops, (ulong) count); } @@ -2276,7 +2298,7 @@ stop_ibuf_merges: os_thread_sleep(20000); count++; - goto stop_ibuf_merges; + goto stop_new_ops; } } @@ -2302,7 +2324,7 @@ try_again: } ut_a(space); - ut_a(space->n_pending_ibuf_merges == 0); + ut_a(space->n_pending_ops == 0); space->is_being_deleted = TRUE; @@ -2358,7 +2380,11 @@ try_again: if (srv_lazy_drop_table) { buf_LRU_mark_space_was_deleted(id); } else { - buf_LRU_invalidate_tablespace(id); + buf_LRU_flush_or_remove_pages( + id, evict_all + ? BUF_REMOVE_ALL_NO_WRITE + : BUF_REMOVE_FLUSH_NO_WRITE); + } #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ @@ -2447,7 +2473,7 @@ fil_discard_tablespace( { ibool success; - success = fil_delete_tablespace(id); + success = fil_delete_tablespace(id, TRUE); if (!success) { fprintf(stderr, @@ -2579,7 +2605,7 @@ fil_rename_tablespace( retry: count++; - if (count > 1000) { + if (!(count % 1000)) { ut_print_timestamp(stderr); fputs(" InnoDB: Warning: problems renaming ", stderr); ut_print_filename(stderr, old_name); @@ -3905,7 +3931,7 @@ convert_err_exit: level = btr_page_get_level(page, &mtr); - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr); + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr, &mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); btr_page_create(new_block, new_page_zip, index, level, &mtr); @@ -3953,7 +3979,7 @@ convert_err_exit: split_rec = page_get_middle_rec(page); new_block = btr_page_alloc(index, page_no + 1, FSP_UP, - btr_page_get_level(page, &mtr), &mtr); + btr_page_get_level(page, &mtr), &mtr, &mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); btr_page_create(new_block, new_page_zip, index, diff --git a/fsp/fsp0fsp.c b/fsp/fsp0fsp.c index 6102c5decd8..858721e62b9 100644 --- a/fsp/fsp0fsp.c +++ b/fsp/fsp0fsp.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,15 +211,13 @@ fseg_n_reserved_pages_low( /********************************************************************//** Marks a page used. The page must reside within the extents of the given segment. */ -static +static __attribute__((nonnull)) void fseg_mark_page_used( /*================*/ fseg_inode_t* seg_inode,/*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ ulint page, /*!< in: page offset */ + xdes_t* descr, /* extent descriptor */ mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** Returns the first extent descriptor for a segment. We think of the extent @@ -250,28 +248,38 @@ fsp_fill_free_list( descriptor page and ibuf bitmap page; then we do not allocate more extents */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr); /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull)); /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return the allocated page number, FIL_NULL if no page could be allocated */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ static -ulint +buf_block_t* fseg_alloc_free_page_low( /*=====================*/ ulint space, /*!< in: space */ ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in: segment inode */ - ulint hint, /*!< in: hint of which page would be desirable */ + fseg_inode_t* seg_inode, /*!< in/out: segment inode */ + ulint hint, /*!< in: hint of which page would be + desirable */ byte direction, /*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized. + If init_mtr!=mtr, but the page is already + latched in mtr, do not initialize the page. */ + __attribute__((warn_unused_result, nonnull)); #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -641,23 +649,22 @@ xdes_calc_descriptor_index( /********************************************************************//** Gets pointer to a the extent descriptor of a page. The page where the extent -descriptor resides is x-locked. If the page offset is equal to the free limit -of the space, adds new extents from above the free limit to the space free -list, if not free limit == space size. This adding is necessary to make the -descriptor defined, as they are uninitialized above the free limit. +descriptor resides is x-locked. This function no longer extends the data +file. @return pointer to the extent descriptor, NULL if the page does not -exist in the space or if the offset exceeds the free limit */ -UNIV_INLINE +exist in the space or if the offset is >= the free limit */ +UNIV_INLINE __attribute__((nonnull, warn_unused_result)) xdes_t* xdes_get_descriptor_with_space_hdr( /*===============================*/ - fsp_header_t* sp_header,/*!< in/out: space header, x-latched */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset; - if equal to the free limit, - we try to add new extents to - the space free list */ - mtr_t* mtr) /*!< in: mtr handle */ + fsp_header_t* sp_header, /*!< in/out: space header, x-latched + in mtr */ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page offset; if equal + to the free limit, we try to + add new extents to the space + free list */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint limit; ulint size; @@ -665,11 +672,9 @@ xdes_get_descriptor_with_space_hdr( ulint descr_page_no; page_t* descr_page; - ut_ad(mtr); ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET); /* Read free limit and space size */ limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT); @@ -677,19 +682,10 @@ xdes_get_descriptor_with_space_hdr( zip_size = dict_table_flags_to_zip_size( mach_read_from_4(sp_header + FSP_SPACE_FLAGS)); - /* If offset is >= size or > limit, return NULL */ - - if ((offset >= size) || (offset > limit)) { - + if ((offset >= size) || (offset >= limit)) { return(NULL); } - /* If offset is == limit, fill free list of the space. */ - - if (offset == limit) { - fsp_fill_free_list(FALSE, space, sp_header, mtr); - } - descr_page_no = xdes_calc_descriptor_page(zip_size, offset); if (descr_page_no == 0) { @@ -719,7 +715,7 @@ is necessary to make the descriptor defined, as they are uninitialized above the free limit. @return pointer to the extent descriptor, NULL if the page does not exist in the space or if the offset exceeds the free limit */ -static +static __attribute__((nonnull, warn_unused_result)) xdes_t* xdes_get_descriptor( /*================*/ @@ -728,7 +724,7 @@ xdes_get_descriptor( or 0 for uncompressed pages */ ulint offset, /*!< in: page offset; if equal to the free limit, we try to add new extents to the space free list */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { buf_block_t* block; fsp_header_t* sp_header; @@ -1112,14 +1108,14 @@ fsp_header_get_tablespace_size(void) Tries to extend a single-table tablespace so that a page would fit in the data file. @return TRUE if success */ -static +static __attribute__((nonnull, warn_unused_result)) ibool fsp_try_extend_data_file_with_pages( /*================================*/ ulint space, /*!< in: space */ ulint page_no, /*!< in: page number */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr) /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ibool success; ulint actual_size; @@ -1144,7 +1140,7 @@ fsp_try_extend_data_file_with_pages( /***********************************************************************//** Tries to extend the last data file of a tablespace if it is auto-extending. @return FALSE if not auto-extending */ -static +static __attribute__((nonnull)) ibool fsp_try_extend_data_file( /*=====================*/ @@ -1154,8 +1150,8 @@ fsp_try_extend_data_file( the actual file size rounded down to megabyte */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr) /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint size; ulint zip_size; @@ -1291,7 +1287,7 @@ fsp_fill_free_list( then we do not allocate more extents */ ulint space, /*!< in: space */ fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint limit; ulint size; @@ -1490,29 +1486,120 @@ fsp_alloc_free_extent( } /**********************************************************************//** -Allocates a single free page from a space. The page is marked as used. -@return the page offset, FIL_NULL if no page could be allocated */ +Allocates a single free page from a space. */ +static __attribute__((nonnull)) +void +fsp_alloc_from_free_frag( +/*=====================*/ + fsp_header_t* header, /*!< in/out: tablespace header */ + xdes_t* descr, /*!< in/out: extent descriptor */ + ulint bit, /*!< in: slot to allocate in the extent */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint frag_n_used; + + ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); + ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr)); + xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr); + + /* Update the FRAG_N_USED field */ + frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, + mtr); + frag_n_used++; + mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, + mtr); + if (xdes_is_full(descr, mtr)) { + /* The fragment is full: move it to another list */ + flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, + mtr); + xdes_set_state(descr, XDES_FULL_FRAG, mtr); + + flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, + mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, + frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, + mtr); + } +} + +/**********************************************************************//** +Gets a buffer block for an allocated page. + +NOTE: If init_mtr != mtr, the block will only be initialized if it was +not previously x-latched. It is assumed that the block has been +x-latched only by mtr, and freed in mtr in that case. + +@return block, initialized if init_mtr==mtr +or rw_lock_x_lock_count(&block->lock) == 1 */ static -ulint +buf_block_t* +fsp_page_create( +/*============*/ + ulint space, /*!< in: space id of the allocated page */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number of the allocated page */ + mtr_t* mtr, /*!< in: mini-transaction of the allocation */ + mtr_t* init_mtr) /*!< in: mini-transaction for initializing + the page */ +{ + buf_block_t* block + = buf_page_create(space, page_no, zip_size, init_mtr); +#ifdef UNIV_SYNC_DEBUG + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX) + == rw_lock_own(&block->lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + /* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */ + rw_lock_x_lock(&block->lock); + mutex_enter(&block->mutex); + buf_block_buf_fix_inc(block, __FILE__, __LINE__); + mutex_exit(&block->mutex); + mtr_memo_push(init_mtr, block, MTR_MEMO_PAGE_X_FIX); + + if (init_mtr == mtr + || rw_lock_get_x_lock_count(&block->lock) == 1) { + + /* Initialize the page, unless it was already + X-latched in mtr. (In this case, we would want to + allocate another page that has not been freed in mtr.) */ + ut_ad(init_mtr == mtr + || !mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + + fsp_init_file_page(block, init_mtr); + } + + return(block); +} + +/**********************************************************************//** +Allocates a single free page from a space. The page is marked as used. +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ +static __attribute__((nonnull, warn_unused_result)) +buf_block_t* fsp_alloc_free_page( /*================*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint hint, /*!< in: hint of which page would be desirable */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mini-transaction in which the + page should be initialized + (may be the same as mtr) */ { fsp_header_t* header; fil_addr_t first; xdes_t* descr; - buf_block_t* block; ulint free; - ulint frag_n_used; ulint page_no; ulint space_size; - ibool success; ut_ad(mtr); + ut_ad(init_mtr); header = fsp_get_space_header(space, zip_size, mtr); @@ -1539,7 +1626,7 @@ fsp_alloc_free_page( if (descr == NULL) { /* No free space left */ - return(FIL_NULL); + return(NULL); } xdes_set_state(descr, XDES_FREE_FRAG, mtr); @@ -1584,50 +1671,18 @@ fsp_alloc_free_page( " space size %lu. Page no %lu.\n", (ulong) space, (ulong) space_size, (ulong) page_no); - return(FIL_NULL); + return(NULL); } - success = fsp_try_extend_data_file_with_pages(space, page_no, - header, mtr); - if (!success) { + if (!fsp_try_extend_data_file_with_pages(space, page_no, + header, mtr)) { /* No disk space left */ - return(FIL_NULL); + return(NULL); } } - xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); + fsp_alloc_from_free_frag(header, descr, free, mtr); - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } - - /* Initialize the allocated page to the buffer pool, so that it can - be obtained immediately with buf_page_get without need for a disk - read. */ - - buf_page_create(space, page_no, zip_size, mtr); - - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - /* Prior contents of the page should be ignored */ - fsp_init_file_page(block, mtr); - - return(page_no); + return(fsp_page_create(space, zip_size, page_no, mtr, init_mtr)); } /**********************************************************************//** @@ -1666,6 +1721,9 @@ fsp_free_page( fputs("InnoDB: Dump of descriptor: ", stderr); ut_print_buf(stderr, ((byte*)descr) - 50, 200); putc('\n', stderr); + /* Crash in debug version, so that we get a core dump + of this corruption. */ + ut_ad(0); if (state == XDES_FREE) { /* We put here some fault tolerance: if the page @@ -1684,6 +1742,9 @@ fsp_free_page( "InnoDB: Dump of descriptor: ", (ulong) page); ut_print_buf(stderr, ((byte*)descr) - 50, 200); putc('\n', stderr); + /* Crash in debug version, so that we get a core dump + of this corruption. */ + ut_ad(0); /* We put here some fault tolerance: if the page is already free, return without doing anything! */ @@ -1718,6 +1779,8 @@ fsp_free_page( mtr); fsp_free_extent(space, zip_size, page, mtr); } + + mtr->n_freed_pages++; } /**********************************************************************//** @@ -1855,7 +1918,6 @@ fsp_alloc_seg_inode_page( fseg_inode_t* inode; buf_block_t* block; page_t* page; - ulint page_no; ulint space; ulint zip_size; ulint i; @@ -1866,15 +1928,15 @@ fsp_alloc_seg_inode_page( zip_size = dict_table_flags_to_zip_size( mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - page_no = fsp_alloc_free_page(space, zip_size, 0, mtr); + block = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr); - if (page_no == FIL_NULL) { + if (block == NULL) { return(FALSE); } - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); + ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); block->check_index_page_at_flush = FALSE; @@ -2287,19 +2349,20 @@ fseg_create_general( } if (page == 0) { - page = fseg_alloc_free_page_low(space, zip_size, - inode, 0, FSP_UP, mtr); + block = fseg_alloc_free_page_low(space, zip_size, + inode, 0, FSP_UP, mtr, mtr); - if (page == FIL_NULL) { + if (block == NULL) { fsp_free_seg_inode(space, zip_size, inode, mtr); goto funct_exit; } - block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr); + ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); + header = byte_offset + buf_block_get_frame(block); - mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE, + mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr); } @@ -2476,8 +2539,10 @@ fseg_fill_free_list( Allocates a free extent for the segment: looks first in the free list of the segment, then tries to allocate from the space free list. NOTE that the extent returned still resides in the segment free list, it is not yet taken off it! -@return allocated extent, still placed in the segment free list, NULL -if could not be allocated */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ static xdes_t* fseg_alloc_free_extent( @@ -2529,22 +2594,30 @@ fseg_alloc_free_extent( Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return the allocated page number, FIL_NULL if no page could be allocated */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ static -ulint +buf_block_t* fseg_alloc_free_page_low( /*=====================*/ ulint space, /*!< in: space */ ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in: segment inode */ - ulint hint, /*!< in: hint of which page would be desirable */ + fseg_inode_t* seg_inode, /*!< in/out: segment inode */ + ulint hint, /*!< in: hint of which page would be + desirable */ byte direction, /*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized. + If init_mtr!=mtr, but the page is already + latched in mtr, do not initialize the page. */ { fsp_header_t* space_header; ulint space_size; @@ -2555,7 +2628,6 @@ fseg_alloc_free_page_low( ulint ret_page; /*!< the allocated page offset, FIL_NULL if could not be allocated */ xdes_t* ret_descr; /*!< the extent of the allocated page */ - ibool frag_page_allocated = FALSE; ibool success; ulint n; @@ -2577,6 +2649,7 @@ fseg_alloc_free_page_low( if (descr == NULL) { /* Hint outside space or too high above free limit: reset hint */ + /* The file space header page is always allocated. */ hint = 0; descr = xdes_get_descriptor(space, zip_size, hint, mtr); } @@ -2587,15 +2660,19 @@ fseg_alloc_free_page_low( && mach_read_from_8(descr + XDES_ID) == seg_id && (xdes_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { - +take_hinted_page: /* 1. We can take the hinted page =================================*/ ret_descr = descr; ret_page = hint; + /* Skip the check for extending the tablespace. If the + page hint were not within the size of the tablespace, + we would have got (descr == NULL) above and reset the hint. */ + goto got_hinted_page; /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FREE) - && ((reserved - used) < reserved / FSEG_FILLFACTOR) - && (used >= FSEG_FRAG_LIMIT)) { + } else if (xdes_get_state(descr, mtr) == XDES_FREE + && reserved - used < reserved / FSEG_FILLFACTOR + && used >= FSEG_FRAG_LIMIT) { /* 2. We allocate the free extent from space and can take ========================================================= @@ -2613,7 +2690,7 @@ fseg_alloc_free_page_low( /* Try to fill the segment free list */ fseg_fill_free_list(seg_inode, space, zip_size, hint + FSP_EXTENT_SIZE, mtr); - ret_page = hint; + goto take_hinted_page; /*-----------------------------------------------------------*/ } else if ((direction != FSP_NO_DIR) && ((reserved - used) < reserved / FSEG_FILLFACTOR) @@ -2661,7 +2738,7 @@ fseg_alloc_free_page_low( first = flst_get_first(seg_inode + FSEG_FREE, mtr); } else { ut_error; - return(FIL_NULL); + return(NULL); } ret_descr = xdes_lst_get_descriptor(space, zip_size, @@ -2673,20 +2750,23 @@ fseg_alloc_free_page_low( } else if (used < FSEG_FRAG_LIMIT) { /* 6. We allocate an individual page from the space ===================================================*/ - ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr); - ret_descr = NULL; + buf_block_t* block = fsp_alloc_free_page( + space, zip_size, hint, mtr, init_mtr); - frag_page_allocated = TRUE; - - if (ret_page != FIL_NULL) { + if (block != NULL) { /* Put the page in the fragment page array of the segment */ n = fseg_find_free_frag_page_slot(seg_inode, mtr); - ut_a(n != FIL_NULL); + ut_a(n != ULINT_UNDEFINED); - fseg_set_nth_frag_page_no(seg_inode, n, ret_page, - mtr); + fseg_set_nth_frag_page_no( + seg_inode, n, buf_block_get_page_no(block), + mtr); } + + /* fsp_alloc_free_page() invoked fsp_init_file_page() + already. */ + return(block); /*-----------------------------------------------------------*/ } else { /* 7. We allocate a new extent and take its first page @@ -2704,7 +2784,7 @@ fseg_alloc_free_page_low( if (ret_page == FIL_NULL) { /* Page could not be allocated */ - return(FIL_NULL); + return(NULL); } if (space != 0) { @@ -2722,38 +2802,22 @@ fseg_alloc_free_page_low( " the space size %lu. Page no %lu.\n", (ulong) space, (ulong) space_size, (ulong) ret_page); - return(FIL_NULL); + return(NULL); } success = fsp_try_extend_data_file_with_pages( space, ret_page, space_header, mtr); if (!success) { /* No disk space left */ - return(FIL_NULL); + return(NULL); } } } - if (!frag_page_allocated) { - /* Initialize the allocated page to buffer pool, so that it - can be obtained immediately with buf_page_get without need - for a disk read */ - buf_block_t* block; - ulint zip_size = dict_table_flags_to_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - - block = buf_page_create(space, ret_page, zip_size, mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size, - ret_page, RW_X_LATCH, - mtr))) { - ut_error; - } - - /* The prior contents of the page should be ignored */ - fsp_init_file_page(block, mtr); - +got_hinted_page: + /* ret_descr == NULL if the block was allocated from free_frag + (XDES_FREE_FRAG) */ + if (ret_descr != NULL) { /* At this point we know the extent and the page offset. The extent is still in the appropriate list (FSEG_NOT_FULL or FSEG_FREE), and the page is not yet marked as used. */ @@ -2763,25 +2827,31 @@ fseg_alloc_free_page_low( ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT, ret_page % FSP_EXTENT_SIZE, mtr) == TRUE); - fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr); + fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr); } - buf_reset_check_index_page_at_flush(space, ret_page); - - return(ret_page); + return(fsp_page_create( + space, dict_table_flags_to_zip_size( + mach_read_from_4(FSP_SPACE_FLAGS + + space_header)), + ret_page, mtr, init_mtr)); } /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return allocated page offset, FIL_NULL if no page could be allocated */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ UNIV_INTERN -ulint +buf_block_t* fseg_alloc_free_page_general( /*=========================*/ - fseg_header_t* seg_header,/*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ + fseg_header_t* seg_header,/*!< in/out: segment header */ + ulint hint, /*!< in: hint of which page would be + desirable */ byte direction,/*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which @@ -2792,15 +2862,18 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction handle */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized. + If init_mtr!=mtr, but the page is already + latched in mtr, do not initialize the page. */ { fseg_inode_t* inode; ulint space; ulint flags; ulint zip_size; rw_lock_t* latch; - ibool success; - ulint page_no; + buf_block_t* block; ulint n_reserved; space = page_get_space_id(page_align(seg_header)); @@ -2825,43 +2898,20 @@ fseg_alloc_free_page_general( inode = fseg_inode_get(seg_header, space, zip_size, mtr); - if (!has_done_reservation) { - success = fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr); - if (!success) { - return(FIL_NULL); - } + if (!has_done_reservation + && !fsp_reserve_free_extents(&n_reserved, space, 2, + FSP_NORMAL, mtr)) { + return(NULL); } - page_no = fseg_alloc_free_page_low(space, zip_size, - inode, hint, direction, mtr); + block = fseg_alloc_free_page_low(space, zip_size, + inode, hint, direction, + mtr, init_mtr); if (!has_done_reservation) { fil_space_release_free_extents(space, n_reserved); } - return(page_no); -} - -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@return allocated page offset, FIL_NULL if no page could be allocated */ -UNIV_INTERN -ulint -fseg_alloc_free_page( -/*=================*/ - fseg_header_t* seg_header,/*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ - byte direction,/*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /*!< in: mtr handle */ -{ - return(fseg_alloc_free_page_general(seg_header, hint, direction, - FALSE, mtr)); + return(block); } /**********************************************************************//** @@ -3183,27 +3233,21 @@ fsp_get_available_space_in_free_extents( /********************************************************************//** Marks a page used. The page must reside within the extents of the given segment. */ -static +static __attribute__((nonnull)) void fseg_mark_page_used( /*================*/ fseg_inode_t* seg_inode,/*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ ulint page, /*!< in: page offset */ + xdes_t* descr, /* extent descriptor */ mtr_t* mtr) /*!< in: mtr */ { - xdes_t* descr; ulint not_full_n_used; - ut_ad(seg_inode && mtr); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - descr = xdes_get_descriptor(space, zip_size, page, mtr); - ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr) == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr)); @@ -3378,6 +3422,8 @@ crash: descr + XDES_FLST_NODE, mtr); fsp_free_extent(space, zip_size, page, mtr); } + + mtr->n_freed_pages++; } /**********************************************************************//** diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index 9b07c82c037..0d08cd2c1b6 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -26,8 +26,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -195,6 +195,9 @@ static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_large_prefix = FALSE; static my_bool innobase_use_sys_stats_table = FALSE; +#ifdef UNIV_DEBUG +static ulong innobase_sys_stats_root_page = 0; +#endif static my_bool innobase_buffer_pool_shm_checksum = TRUE; static uint innobase_buffer_pool_shm_key = 0; @@ -1174,6 +1177,9 @@ convert_error_code_to_mysql( case DB_OUT_OF_FILE_SPACE: return(HA_ERR_RECORD_FILE_FULL); + case DB_TABLE_IN_FK_CHECK: + return(HA_ERR_TABLE_IN_FK_CHECK); + case DB_TABLE_IS_BEING_USED: return(HA_ERR_WRONG_COMMAND); @@ -1623,70 +1629,87 @@ values we want to reserve for multi-value inserts e.g., INSERT INTO T VALUES(), (), (); -innobase_next_autoinc() will be called with increment set to -n * 3 where autoinc_lock_mode != TRADITIONAL because we want -to reserve 3 values for the multi-value INSERT above. +innobase_next_autoinc() will be called with increment set to 3 where +autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for +the multi-value INSERT above. @return the next value */ static ulonglong innobase_next_autoinc( /*==================*/ ulonglong current, /*!< in: Current value */ - ulonglong increment, /*!< in: increment current by */ + ulonglong need, /*!< in: count of values needed */ + ulonglong step, /*!< in: AUTOINC increment step */ ulonglong offset, /*!< in: AUTOINC offset */ ulonglong max_value) /*!< in: max value for type */ { ulonglong next_value; + ulonglong block = need * step; /* Should never be 0. */ - ut_a(increment > 0); + ut_a(need > 0); + ut_a(block > 0); + ut_a(max_value > 0); + + /* Current value should never be greater than the maximum. */ + ut_a(current <= max_value); /* According to MySQL documentation, if the offset is greater than - the increment then the offset is ignored. */ - if (offset > increment) { + the step then the offset is ignored. */ + if (offset > block) { offset = 0; } - if (max_value <= current) { + /* Check for overflow. */ + if (block >= max_value + || offset > max_value + || current == max_value + || max_value - offset <= offset) { + next_value = max_value; - } else if (offset <= 1) { - /* Offset 0 and 1 are the same, because there must be at - least one node in the system. */ - if (max_value - current <= increment) { - next_value = max_value; - } else { - next_value = current + increment; - } - } else if (max_value > current) { - if (current > offset) { - next_value = ((current - offset) / increment) + 1; - } else { - next_value = ((offset - current) / increment) + 1; - } - - ut_a(increment > 0); - ut_a(next_value > 0); - - /* Check for multiplication overflow. */ - if (increment > (max_value / next_value)) { - - next_value = max_value; - } else { - next_value *= increment; - - ut_a(max_value >= next_value); - - /* Check for overflow. */ - if (max_value - next_value <= offset) { - next_value = max_value; - } else { - next_value += offset; - } - } } else { - next_value = max_value; + ut_a(max_value > current); + + ulonglong free = max_value - current; + + if (free < offset || free - offset <= block) { + next_value = max_value; + } else { + next_value = 0; + } } + if (next_value == 0) { + ulonglong next; + + if (current > offset) { + next = (current - offset) / step; + } else { + next = (offset - current) / step; + } + + ut_a(max_value > next); + next_value = next * step; + /* Check for multiplication overflow. */ + ut_a(next_value >= next); + ut_a(max_value > next_value); + + /* Check for overflow */ + if (max_value - next_value >= block) { + + next_value += block; + + if (max_value - next_value >= offset) { + next_value += offset; + } else { + next_value = max_value; + } + } else { + next_value = max_value; + } + } + + ut_a(next_value != 0); ut_a(next_value <= max_value); return(next_value); @@ -2731,6 +2754,10 @@ mem_free_and_error: srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; +#ifdef UNIV_DEBUG + srv_sys_stats_root_page = innobase_sys_stats_root_page; +#endif + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -3905,52 +3932,140 @@ ha_innobase::primary_key_is_clustered() return(true); } +/** Always normalize table name to lower case on Windows */ +#ifdef __WIN__ +#define normalize_table_name(norm_name, name) \ + normalize_table_name_low(norm_name, name, TRUE) +#else +#define normalize_table_name(norm_name, name) \ + normalize_table_name_low(norm_name, name, FALSE) +#endif /* __WIN__ */ + /*****************************************************************//** Normalizes a table name string. A normalized name consists of the database name catenated to '/' and table name. An example: test/mytable. On Windows normalization puts both the database name and the -table name always to lower case. */ +table name always to lower case if "set_lower_case" is set to TRUE. */ static void -normalize_table_name( -/*=================*/ +normalize_table_name_low( +/*=====================*/ char* norm_name, /*!< out: normalized name as a null-terminated string */ - const char* name) /*!< in: table name string */ + const char* name, /*!< in: table name string */ + ibool set_lower_case) /*!< in: TRUE if we want to set + name to lower case */ { char* name_ptr; char* db_ptr; + ulint db_len; char* ptr; /* Scan name from the end */ - ptr = strend(name)-1; + ptr = strend(name) - 1; + /* seek to the last path separator */ while (ptr >= name && *ptr != '\\' && *ptr != '/') { ptr--; } name_ptr = ptr + 1; - DBUG_ASSERT(ptr > name); + /* skip any number of path separators */ + while (ptr >= name && (*ptr == '\\' || *ptr == '/')) { + ptr--; + } - ptr--; + DBUG_ASSERT(ptr >= name); + /* seek to the last but one path separator or one char before + the beginning of name */ + db_len = 0; while (ptr >= name && *ptr != '\\' && *ptr != '/') { ptr--; + db_len++; } db_ptr = ptr + 1; - memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name)); + memcpy(norm_name, db_ptr, db_len); - norm_name[name_ptr - db_ptr - 1] = '/'; + norm_name[db_len] = '/'; -#ifdef __WIN__ - innobase_casedn_str(norm_name); -#endif + memcpy(norm_name + db_len + 1, name_ptr, strlen(name_ptr) + 1); + + if (set_lower_case) { + innobase_casedn_str(norm_name); + } } +#if !defined(DBUG_OFF) +/********************************************************************* +Test normalize_table_name_low(). */ +static +void +test_normalize_table_name_low() +/*===========================*/ +{ + char norm_name[128]; + const char* test_data[][2] = { + /* input, expected result */ + {"./mysqltest/t1", "mysqltest/t1"}, + {"./test/#sql-842b_2", "test/#sql-842b_2"}, + {"./test/#sql-85a3_10", "test/#sql-85a3_10"}, + {"./test/#sql2-842b-2", "test/#sql2-842b-2"}, + {"./test/bug29807", "test/bug29807"}, + {"./test/foo", "test/foo"}, + {"./test/innodb_bug52663", "test/innodb_bug52663"}, + {"./test/t", "test/t"}, + {"./test/t1", "test/t1"}, + {"./test/t10", "test/t10"}, + {"/a/b/db/table", "db/table"}, + {"/a/b/db///////table", "db/table"}, + {"/a/b////db///////table", "db/table"}, + {"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"}, + {"db/table", "db/table"}, + {"ddd/t", "ddd/t"}, + {"d/ttt", "d/ttt"}, + {"d/t", "d/t"}, + {".\\mysqltest\\t1", "mysqltest/t1"}, + {".\\test\\#sql-842b_2", "test/#sql-842b_2"}, + {".\\test\\#sql-85a3_10", "test/#sql-85a3_10"}, + {".\\test\\#sql2-842b-2", "test/#sql2-842b-2"}, + {".\\test\\bug29807", "test/bug29807"}, + {".\\test\\foo", "test/foo"}, + {".\\test\\innodb_bug52663", "test/innodb_bug52663"}, + {".\\test\\t", "test/t"}, + {".\\test\\t1", "test/t1"}, + {".\\test\\t10", "test/t10"}, + {"C:\\a\\b\\db\\table", "db/table"}, + {"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"}, + {"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"}, + {"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"}, + {"db\\table", "db/table"}, + {"ddd\\t", "ddd/t"}, + {"d\\ttt", "d/ttt"}, + {"d\\t", "d/t"}, + }; + + for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) { + printf("test_normalize_table_name_low(): " + "testing \"%s\", expected \"%s\"... ", + test_data[i][0], test_data[i][1]); + + normalize_table_name_low(norm_name, test_data[i][0], FALSE); + + if (strcmp(norm_name, test_data[i][1]) == 0) { + printf("ok\n"); + } else { + printf("got \"%s\"\n", norm_name); + ut_error; + } + } +} +#endif /* !DBUG_OFF */ + /********************************************************************//** Get the upper limit of the MySQL integral and floating-point type. @return maximum allowed value for the field */ @@ -4297,7 +4412,7 @@ ha_innobase::innobase_initialize_autoinc() nor the offset, so use a default increment of 1. */ auto_inc = innobase_next_autoinc( - read_auto_inc, 1, 1, col_max_value); + read_auto_inc, 1, 1, 0, col_max_value); break; } @@ -4352,6 +4467,8 @@ ha_innobase::open( THD* thd; ulint retries = 0; char* is_part = NULL; + ibool par_case_name_set = FALSE; + char par_case_name[MAX_FULL_NAME_LEN + 1]; DBUG_ENTER("ha_innobase::open"); @@ -4382,51 +4499,107 @@ ha_innobase::open( DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); } - /* Create buffers for packing the fields of a record. Why - table->reclength did not work here? Obviously, because char - fields when packed actually became 1 byte longer, when we also - stored the string length as the first byte. */ - - upd_and_key_val_buff_len = - table->s->reclength + table->s->max_key_length - + MAX_REF_PARTS * 3; - if (!(uchar*) my_multi_malloc(MYF(MY_WME), - &upd_buff, upd_and_key_val_buff_len, - &key_val_buff, upd_and_key_val_buff_len, - NullS)) { - free_share(share); - - DBUG_RETURN(1); - } + /* Will be allocated if it is needed in ::update_row() */ + upd_buf = NULL; + upd_buf_size = 0; /* We look for pattern #P# to see if the table is partitioned MySQL table. The retry logic for partitioned tables is a workaround for http://bugs.mysql.com/bug.php?id=33349. Look at support issue https://support.mysql.com/view.php?id=21080 for more details. */ +#ifdef __WIN__ + is_part = strstr(norm_name, "#p#"); +#else is_part = strstr(norm_name, "#P#"); +#endif /* __WIN__ */ + retry: /* Get pointer to a table object in InnoDB dictionary cache */ ib_table = dict_table_get(norm_name, TRUE); - + if (srv_pass_corrupt_table <= 1 && ib_table && ib_table->is_corrupt) { free_share(share); - my_free(upd_buff); + my_free(upd_buf); + upd_buf = NULL; + upd_buf_size = 0; DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); } share->ib_table = ib_table; - - - - if (NULL == ib_table) { if (is_part && retries < 10) { - ++retries; - os_thread_sleep(100000); - goto retry; + /* MySQL partition engine hard codes the file name + separator as "#P#". The text case is fixed even if + lower_case_table_names is set to 1 or 2. This is true + for sub-partition names as well. InnoDB always + normalises file names to lower case on Windows, this + can potentially cause problems when copying/moving + tables between platforms. + + 1) If boot against an installation from Windows + platform, then its partition table name could + be all be in lower case in system tables. So we + will need to check lower case name when load table. + + 2) If we boot an installation from other case + sensitive platform in Windows, we might need to + check the existence of table name without lowering + case them in the system table. */ + if (innobase_get_lower_case_table_names() == 1) { + + if (!par_case_name_set) { +#ifndef __WIN__ + /* Check for the table using lower + case name, including the partition + separator "P" */ + memcpy(par_case_name, norm_name, + strlen(norm_name)); + par_case_name[strlen(norm_name)] = 0; + innobase_casedn_str(par_case_name); +#else + /* On Windows platfrom, check + whether there exists table name in + system table whose name is + not being normalized to lower case */ + normalize_table_name_low( + par_case_name, name, FALSE); +#endif + par_case_name_set = TRUE; + } + + ib_table = dict_table_get( + par_case_name, FALSE); + } + if (!ib_table) { + ++retries; + os_thread_sleep(100000); + goto retry; + } else { +#ifndef __WIN__ + sql_print_warning("Partition table %s opened " + "after converting to lower " + "case. The table may have " + "been moved from a case " + "in-sensitive file system. " + "Please recreate table in " + "the current file system\n", + norm_name); +#else + sql_print_warning("Partition table %s opened " + "after skipping the step to " + "lower case the table name. " + "The table may have been " + "moved from a case sensitive " + "file system. Please " + "recreate table in the " + "current file system\n", + norm_name); +#endif + goto table_opened; + } } if (is_part) { @@ -4451,12 +4624,13 @@ retry: "how you can resolve the problem.\n", norm_name); free_share(share); - my_free(upd_buff); my_errno = ENOENT; DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } +table_opened: + if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) { sql_print_error("MySQL is trying to open a table handle but " "the .ibd file for\ntable %s does not exist.\n" @@ -4467,16 +4641,14 @@ retry: "how you can resolve the problem.\n", norm_name); free_share(share); - my_free(upd_buff); my_errno = ENOENT; dict_table_decrement_handle_count(ib_table, FALSE); DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); } - prebuilt = row_create_prebuilt(ib_table); + prebuilt = row_create_prebuilt(ib_table, table->s->reclength); - prebuilt->mysql_row_len = table->s->reclength; prebuilt->default_rec = table->s->default_values; ut_ad(prebuilt->default_rec); @@ -4665,7 +4837,13 @@ ha_innobase::close(void) row_prebuilt_free(prebuilt, FALSE); - my_free(upd_buff); + if (upd_buf != NULL) { + ut_ad(upd_buf_size != 0); + my_free(upd_buf); + upd_buf = NULL; + upd_buf_size = 0; + } + free_share(share); /* Tell InnoDB server that there might be work for @@ -5759,15 +5937,16 @@ set_max_autoinc: if (auto_inc <= col_max_value) { ut_a(prebuilt->autoinc_increment > 0); - ulonglong need; ulonglong offset; + ulonglong increment; offset = prebuilt->autoinc_offset; - need = prebuilt->autoinc_increment; + increment = prebuilt->autoinc_increment; auto_inc = innobase_next_autoinc( auto_inc, - need, offset, col_max_value); + 1, increment, offset, + col_max_value); err = innobase_set_max_autoinc( auto_inc); @@ -5970,6 +6149,23 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); + if (upd_buf == NULL) { + ut_ad(upd_buf_size == 0); + + /* Create a buffer for packing the fields of a record. Why + table->reclength did not work here? Obviously, because char + fields when packed actually became 1 byte longer, when we also + stored the string length as the first byte. */ + + upd_buf_size = table->s->reclength + table->s->max_key_length + + MAX_REF_PARTS * 3; + upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME)); + if (upd_buf == NULL) { + upd_buf_size = 0; + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + } + ha_statistic_increment(&SSV::ha_update_count); if (share->ib_table->is_corrupt) { @@ -5986,11 +6182,10 @@ ha_innobase::update_row( } /* Build an update vector from the modified fields in the rows - (uses upd_buff of the handle) */ + (uses upd_buf of the handle) */ calc_row_difference(uvect, (uchar*) old_row, new_row, table, - upd_buff, (ulint)upd_and_key_val_buff_len, - prebuilt, user_thd); + upd_buf, upd_buf_size, prebuilt, user_thd); /* This is not a delete */ prebuilt->upd_node->is_delete = FALSE; @@ -6027,14 +6222,14 @@ ha_innobase::update_row( if (auto_inc <= col_max_value && auto_inc != 0) { - ulonglong need; ulonglong offset; + ulonglong increment; offset = prebuilt->autoinc_offset; - need = prebuilt->autoinc_increment; + increment = prebuilt->autoinc_increment; auto_inc = innobase_next_autoinc( - auto_inc, need, offset, col_max_value); + auto_inc, 1, increment, offset, col_max_value); error = innobase_set_max_autoinc(auto_inc); } @@ -6359,6 +6554,7 @@ ha_innobase::index_read( DBUG_ENTER("index_read"); ut_a(prebuilt->trx == thd_to_trx(user_thd)); + ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT); ha_statistic_increment(&SSV::ha_read_key_count); @@ -6391,8 +6587,7 @@ ha_innobase::index_read( row_sel_convert_mysql_key_to_innobase( prebuilt->search_tuple, - (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, + srch_key_val1, sizeof(srch_key_val1), index, (byte*) key_ptr, (ulint) key_len, @@ -7559,6 +7754,8 @@ ha_innobase::create( DBUG_RETURN(HA_ERR_TO_BIG_ROW); } + ut_a(strlen(name) < sizeof(name2)); + strcpy(name2, name); normalize_table_name(norm_name, name2); @@ -8002,6 +8199,11 @@ ha_innobase::delete_table( DBUG_ENTER("ha_innobase::delete_table"); + DBUG_EXECUTE_IF( + "test_normalize_table_name_low", + test_normalize_table_name_low(); + ); + /* Strangely, MySQL passes the table name without the '.frm' extension, in contrast to ::create */ normalize_table_name(norm_name, name); @@ -8162,6 +8364,8 @@ innobase_rename_table( normalize_table_name(norm_to, to); normalize_table_name(norm_from, from); + DEBUG_SYNC_C("innodb_rename_table_ready"); + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -8279,12 +8483,6 @@ ha_innobase::records_in_range( { KEY* key; dict_index_t* index; - uchar* key_val_buff2 = (uchar*) my_malloc( - table->s->reclength - + table->s->max_key_length + 100, - MYF(MY_FAE)); - ulint buff2_len = table->s->reclength - + table->s->max_key_length + 100; dtuple_t* range_start; dtuple_t* range_end; ib_int64_t n_rows; @@ -8293,7 +8491,6 @@ ha_innobase::records_in_range( mem_heap_t* heap; DBUG_ENTER("records_in_range"); - DBUG_ASSERT(min_key || max_key); ut_a(prebuilt->trx == thd_to_trx(ha_thd())); @@ -8336,8 +8533,8 @@ ha_innobase::records_in_range( dict_index_copy_types(range_end, index, key->key_parts); row_sel_convert_mysql_key_to_innobase( - range_start, (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, + range_start, + srch_key_val1, sizeof(srch_key_val1), index, (byte*) (min_key ? min_key->key : (const uchar*) 0), @@ -8348,8 +8545,9 @@ ha_innobase::records_in_range( : range_start->n_fields == 0); row_sel_convert_mysql_key_to_innobase( - range_end, (byte*) key_val_buff2, - buff2_len, index, + range_end, + srch_key_val2, sizeof(srch_key_val2), + index, (byte*) (max_key ? max_key->key : (const uchar*) 0), (ulint) (max_key ? max_key->length : 0), @@ -8376,7 +8574,6 @@ ha_innobase::records_in_range( mem_heap_free(heap); func_exit: - my_free(key_val_buff2); prebuilt->trx->op_info = (char*)""; @@ -10801,16 +10998,15 @@ ha_innobase::get_auto_increment( /* With old style AUTOINC locking we only update the table's AUTOINC counter after attempting to insert the row. */ if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) { - ulonglong need; ulonglong current; ulonglong next_value; current = *first_value > col_max_value ? autoinc : *first_value; - need = *nb_reserved_values * increment; /* Compute the last value in the interval */ next_value = innobase_next_autoinc( - current, need, offset, col_max_value); + current, *nb_reserved_values, increment, offset, + col_max_value); prebuilt->autoinc_last_value = next_value; @@ -12209,6 +12405,13 @@ static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, "So you should use ANALYZE TABLE command intentionally.", NULL, NULL, FALSE); +#ifdef UNIV_DEBUG +static MYSQL_SYSVAR_ULONG(sys_stats_root_page, innobase_sys_stats_root_page, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Override the SYS_STATS root page id, 0 = no override (for testing only)", + NULL, NULL, 0, 0, ULONG_MAX, 0); +#endif + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -12426,6 +12629,13 @@ static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, "trigger a readahead.", NULL, NULL, 56, 0, 64, 0); +#ifdef UNIV_DEBUG +static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, + PLUGIN_VAR_RQCMDARG, + "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", + NULL, NULL, 0, 0, 1024, 0); +#endif /* UNIV_DEBUG */ + static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The maximum size of the insert buffer. (in bytes)", @@ -12547,14 +12757,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method, "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)", NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib); -#ifdef UNIV_DEBUG -static MYSQL_SYSVAR_ULONG(flush_checkpoint_debug, srv_flush_checkpoint_debug, - PLUGIN_VAR_RQCMDARG, - "Debug flags for InnoDB flushing and checkpointing (0=none," - "1=stop preflush and checkpointing)", - NULL, NULL, 0, 0, 1, 0); -#endif - static MYSQL_SYSVAR_ULONG(import_table_from_xtrabackup, srv_expand_import, PLUGIN_VAR_RQCMDARG, "Enable/Disable converting automatically *.ibd files when import tablespace.", @@ -12660,6 +12862,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), MYSQL_SYSVAR(use_sys_stats_table), +#ifdef UNIV_DEBUG + MYSQL_SYSVAR(sys_stats_root_page), +#endif MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(adaptive_hash_index_partitions), @@ -12702,8 +12907,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(purge_batch_size), MYSQL_SYSVAR(rollback_segments), #ifdef UNIV_DEBUG - MYSQL_SYSVAR(flush_checkpoint_debug), -#endif + MYSQL_SYSVAR(trx_rseg_n_slots_debug), +#endif /* UNIV_DEBUG */ MYSQL_SYSVAR(corrupt_table_action), MYSQL_SYSVAR(lazy_drop_table), MYSQL_SYSVAR(fake_changes), diff --git a/handler/ha_innodb.h b/handler/ha_innodb.h index 9506f309827..39ef3631138 100644 --- a/handler/ha_innodb.h +++ b/handler/ha_innodb.h @@ -79,13 +79,14 @@ class ha_innobase: public handler INNOBASE_SHARE* share; /*!< information for MySQL table locking */ - uchar* upd_buff; /*!< buffer used in updates */ - uchar* key_val_buff; /*!< buffer used in converting + uchar* upd_buf; /*!< buffer used in updates */ + ulint upd_buf_size; /*!< the size of upd_buf in bytes */ + uchar srch_key_val1[REC_VERSION_56_MAX_INDEX_COL_LEN + 2]; + uchar srch_key_val2[REC_VERSION_56_MAX_INDEX_COL_LEN + 2]; + /*!< buffers used in converting search key values from MySQL format - to Innodb format */ - ulong upd_and_key_val_buff_len; - /* the length of each of the previous - two buffers */ + to InnoDB format. "+ 2" for the two + bytes where the length is stored */ Table_flags int_table_flags; uint primary_key; ulong start_of_scan; /*!< this is set to 1 when we are diff --git a/handler/handler0alter.cc b/handler/handler0alter.cc index 8c5783fa8b9..398e8bee425 100644 --- a/handler/handler0alter.cc +++ b/handler/handler0alter.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -877,6 +877,8 @@ ha_innobase::add_index( prebuilt->table, indexed_table, index, num_of_idx, table); + DBUG_EXECUTE_IF("crash_innodb_add_index_after", DBUG_SUICIDE();); + error_handling: /* After an error, remove all those index definitions from the dictionary which were defined. */ @@ -1019,7 +1021,12 @@ ha_innobase::final_add_index( row_prebuilt_free(prebuilt, TRUE); error = row_merge_drop_table(trx, old_table); add->indexed_table->n_mysql_handles_opened++; - prebuilt = row_create_prebuilt(add->indexed_table); + prebuilt = row_create_prebuilt(add->indexed_table, + 0 /* XXX Do we know the mysql_row_len here? + Before the addition of this parameter to + row_create_prebuilt() the mysql_row_len + member was left 0 (from zalloc) in the + prebuilt object. */); } err = convert_error_code_to_mysql( @@ -1153,7 +1160,9 @@ ha_innobase::prepare_drop_index( goto func_exit; } + rw_lock_x_lock(dict_index_get_lock(index)); index->to_be_dropped = TRUE; + rw_lock_x_unlock(dict_index_get_lock(index)); } /* If FOREIGN_KEY_CHECKS = 1 you may not drop an index defined @@ -1272,7 +1281,9 @@ func_exit: = dict_table_get_first_index(prebuilt->table); do { + rw_lock_x_lock(dict_index_get_lock(index)); index->to_be_dropped = FALSE; + rw_lock_x_unlock(dict_index_get_lock(index)); index = dict_table_get_next_index(index); } while (index); } @@ -1338,7 +1349,9 @@ ha_innobase::final_drop_index( for (index = dict_table_get_first_index(prebuilt->table); index; index = dict_table_get_next_index(index)) { + rw_lock_x_lock(dict_index_get_lock(index)); index->to_be_dropped = FALSE; + rw_lock_x_unlock(dict_index_get_lock(index)); } goto func_exit; diff --git a/ibuf/ibuf0ibuf.c b/ibuf/ibuf0ibuf.c index 00b7ea7347d..562f207268a 100644 --- a/ibuf/ibuf0ibuf.c +++ b/ibuf/ibuf0ibuf.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -1523,7 +1523,7 @@ ibuf_add_ops( for (i = 0; i < IBUF_OP_COUNT; i++) { #ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_ulint(&arr[i], ops[i]); + (void) os_atomic_increment_ulint(&arr[i], ops[i]); #else /* HAVE_ATOMIC_BUILTINS */ arr[i] += ops[i]; #endif /* HAVE_ATOMIC_BUILTINS */ @@ -2222,14 +2222,14 @@ ibool ibuf_add_free_page(void) /*====================*/ { - mtr_t mtr; - page_t* header_page; - ulint flags; - ulint zip_size; - ulint page_no; - page_t* page; - page_t* root; - page_t* bitmap_page; + mtr_t mtr; + page_t* header_page; + ulint flags; + ulint zip_size; + buf_block_t* block; + page_t* page; + page_t* root; + page_t* bitmap_page; mtr_start(&mtr); @@ -2250,29 +2250,24 @@ ibuf_add_free_page(void) of a deadlock. This is the reason why we created a special ibuf header page apart from the ibuf tree. */ - page_no = fseg_alloc_free_page( + block = fseg_alloc_free_page( header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, &mtr); - if (UNIV_UNLIKELY(page_no == FIL_NULL)) { + if (block == NULL) { mtr_commit(&mtr); return(FALSE); - } else { - buf_block_t* block = buf_page_get( - IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr); - - ibuf_enter(&mtr); - - mutex_enter(&ibuf_mutex); - - root = ibuf_tree_root_get(&mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - - page = buf_block_get_frame(block); } + ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); + ibuf_enter(&mtr); + mutex_enter(&ibuf_mutex); + root = ibuf_tree_root_get(&mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); + page = buf_block_get_frame(block); + /* Add the page to the free list and update the ibuf size data */ flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, @@ -2288,12 +2283,13 @@ ibuf_add_free_page(void) (level 2 page) */ bitmap_page = ibuf_bitmap_get_map_page( - IBUF_SPACE_ID, page_no, zip_size, &mtr); + IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr); mutex_exit(&ibuf_mutex); ibuf_bitmap_page_set_bits( - bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr); + bitmap_page, buf_block_get_page_no(block), zip_size, + IBUF_BITMAP_IBUF, TRUE, &mtr); ibuf_mtr_commit(&mtr); @@ -2588,7 +2584,15 @@ ibuf_get_merge_page_nos_func( } else { rec_page_no = ibuf_rec_get_page_no(mtr, rec); rec_space_id = ibuf_rec_get_space(mtr, rec); - ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO); + /* In the system tablespace, the smallest + possible secondary index leaf page number is + bigger than IBUF_TREE_ROOT_PAGE_NO (4). In + other tablespaces, the clustered index tree is + created at page 3, which makes page 4 the + smallest possible secondary index leaf page + (and that only after DROP INDEX). */ + ut_ad(rec_page_no + > IBUF_TREE_ROOT_PAGE_NO - (rec_space_id != 0)); } #ifdef UNIV_IBUF_DEBUG @@ -3920,6 +3924,7 @@ ibuf_insert_to_index_page_low( fputs("InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); } /************************************************************************ @@ -3970,9 +3975,10 @@ ibuf_insert_to_index_page( "InnoDB: but the number of fields does not match!\n", stderr); dump: - buf_page_print(page, 0); + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); dtuple_print(stderr, entry); + ut_ad(0); fputs("InnoDB: The table where where" " this index record belongs\n" @@ -4113,6 +4119,11 @@ ibuf_set_del_mark( TRUE, mtr); } } else { + const page_t* page + = page_cur_get_page(&page_cur); + const buf_block_t* block + = page_cur_get_block(&page_cur); + ut_print_timestamp(stderr); fputs(" InnoDB: unable to find a record to delete-mark\n", stderr); @@ -4121,10 +4132,14 @@ ibuf_set_del_mark( fputs("\n" "InnoDB: record ", stderr); rec_print(stderr, page_cur_get_rec(&page_cur), index); - putc('\n', stderr); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); + fprintf(stderr, "\nspace %u offset %u" + " (%u records, index id %llu)\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + (unsigned) buf_block_get_space(block), + (unsigned) buf_block_get_page_no(block), + (unsigned) page_get_n_recs(page), + (ulonglong) btr_page_get_index_id(page)); ut_ad(0); } } @@ -4168,12 +4183,31 @@ ibuf_delete( offsets = rec_get_offsets( rec, index, offsets, ULINT_UNDEFINED, &heap); - /* Refuse to delete the last record. */ - ut_a(page_get_n_recs(page) > 1); + if (page_get_n_recs(page) <= 1 + || !(REC_INFO_DELETED_FLAG + & rec_get_info_bits(rec, page_is_comp(page)))) { + /* Refuse to purge the last record or a + record that has not been marked for deletion. */ + ut_print_timestamp(stderr); + fputs(" InnoDB: unable to purge a record\n", + stderr); + fputs("InnoDB: tuple ", stderr); + dtuple_print(stderr, entry); + fputs("\n" + "InnoDB: record ", stderr); + rec_print_new(stderr, rec, offsets); + fprintf(stderr, "\nspace %u offset %u" + " (%u records, index id %llu)\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + (unsigned) buf_block_get_space(block), + (unsigned) buf_block_get_page_no(block), + (unsigned) page_get_n_recs(page), + (ulonglong) btr_page_get_index_id(page)); - /* The record should have been marked for deletion. */ - ut_ad(REC_INFO_DELETED_FLAG - & rec_get_info_bits(rec, page_is_comp(page))); + ut_ad(0); + return; + } lock_update_delete(block, rec); @@ -4259,6 +4293,7 @@ ibuf_restore_pos( fprintf(stderr, "InnoDB: ibuf tree ok\n"); fflush(stderr); + ut_ad(0); } return(FALSE); @@ -4441,7 +4476,7 @@ ibuf_merge_or_delete_for_page( function. When the counter is > 0, that prevents tablespace from being dropped. */ - tablespace_being_deleted = fil_inc_pending_ibuf_merges(space); + tablespace_being_deleted = fil_inc_pending_ops(space); if (UNIV_UNLIKELY(tablespace_being_deleted)) { /* Do not try to read the bitmap page from space; @@ -4467,7 +4502,7 @@ ibuf_merge_or_delete_for_page( /* No inserts buffered for this page */ if (!tablespace_being_deleted) { - fil_decr_pending_ibuf_merges(space); + fil_decr_pending_ops(space); } return; @@ -4516,12 +4551,14 @@ ibuf_merge_or_delete_for_page( bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr); - buf_page_print(bitmap_page, 0); + buf_page_print(bitmap_page, 0, + BUF_PAGE_PRINT_NO_CRASH); ibuf_mtr_commit(&mtr); fputs("\nInnoDB: Dump of the page:\n", stderr); - buf_page_print(block->frame, 0); + buf_page_print(block->frame, 0, + BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "InnoDB: Error: corruption in the tablespace." @@ -4541,6 +4578,7 @@ ibuf_merge_or_delete_for_page( (ulong) page_no, (ulong) fil_page_get_type(block->frame)); + ut_ad(0); } } @@ -4747,7 +4785,7 @@ reset_bit: mem_heap_free(heap); #ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_ulint(&ibuf->n_merges, 1); + (void) os_atomic_increment_ulint(&ibuf->n_merges, 1); ibuf_add_ops(ibuf->n_merged_ops, mops); ibuf_add_ops(ibuf->n_discarded_ops, dops); #else /* HAVE_ATOMIC_BUILTINS */ @@ -4763,7 +4801,7 @@ reset_bit: if (update_ibuf_bitmap && !tablespace_being_deleted) { - fil_decr_pending_ibuf_merges(space); + fil_decr_pending_ops(space); } #ifdef UNIV_IBUF_COUNT_DEBUG diff --git a/include/btr0btr.h b/include/btr0btr.h index 058fcfb64de..520527d0375 100644 --- a/include/btr0btr.h +++ b/include/btr0btr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -92,6 +92,26 @@ insert/delete buffer when the record is not in the buffer pool. */ buffer when the record is not in the buffer pool. */ #define BTR_DELETE 8192 +/**************************************************************//** +Report that an index page is corrupted. */ +UNIV_INTERN +void +btr_corruption_report( +/*==================*/ + const buf_block_t* block, /*!< in: corrupted block */ + const dict_index_t* index) /*!< in: index tree */ + UNIV_COLD __attribute__((nonnull)); + +/** Assert that a B-tree page is not corrupted. +@param block buffer block containing a B-tree page +@param index the B-tree index */ +#define btr_assert_not_corrupted(block, index) \ + if ((ibool) !!page_is_comp(buf_block_get_frame(block)) \ + != dict_table_is_comp((index)->table)) { \ + btr_corruption_report(block, index); \ + ut_error; \ + } + #ifdef UNIV_BLOB_DEBUG # include "ut0rbt.h" /** An index->blobs entry for keeping track of off-page column references */ @@ -569,11 +589,14 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; - the page must not be empty: in record delete - use btr_discard_page if the page would become - empty */ - mtr_t* mtr); /*!< in: mtr */ + btr_cur_t* cursor, /*!< in/out: cursor on the page to merge + or lift; the page must not be empty: + when deleting records, use btr_discard_page() + if the page would become empty */ + ibool adjust, /*!< in: TRUE if should adjust the + cursor position even if compression occurs */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull)); /*************************************************************//** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot @@ -614,17 +637,23 @@ btr_parse_page_reorganize( #ifndef UNIV_HOTBACKUP /**************************************************************//** Gets the number of pages in a B-tree. -@return number of pages */ +@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ UNIV_INTERN ulint btr_get_size( /*=========*/ dict_index_t* index, /*!< in: index */ - ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ + ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ + mtr_t* mtr) /*!< in/out: mini-transaction where index + is s-latched */ + __attribute__((nonnull, warn_unused_result)); /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume that the caller has made the reservation for free extents! -@return new allocated block, x-latched; NULL if out of space */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ UNIV_INTERN buf_block_t* btr_page_alloc( @@ -635,7 +664,12 @@ btr_page_alloc( page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mini-transaction + for x-latching and initializing + the page */ + __attribute__((nonnull, warn_unused_result)); /**************************************************************//** Frees a file page used in an index tree. NOTE: cannot free field external storage pages because the page must contain info on its level. */ diff --git a/include/btr0btr.ic b/include/btr0btr.ic index f446ecb69d3..53c8159c448 100644 --- a/include/btr0btr.ic +++ b/include/btr0btr.ic @@ -279,7 +279,7 @@ btr_node_ptr_get_child_page_no( "InnoDB: a nonsensical page number 0" " in a node ptr record at offset %lu\n", (ulong) page_offset(rec)); - buf_page_print(page_align(rec), 0); + buf_page_print(page_align(rec), 0, 0); } return(page_no); diff --git a/include/btr0cur.h b/include/btr0cur.h index 4f33aacc48e..cbc6103c2ee 100644 --- a/include/btr0cur.h +++ b/include/btr0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -36,6 +36,9 @@ Created 10/16/1994 Heikki Tuuri #define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ #define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the update vector or inserted entry */ +#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update() + must keep cursor position when + moving columns to big_rec */ #ifndef UNIV_HOTBACKUP #include "que0types.h" @@ -310,7 +313,9 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update */ + btr_cur_t* cursor, /*!< in/out: cursor on the record to update; + cursor may become invalid if *big_rec == NULL + || !(flags & BTR_KEEP_POS_FLAG) */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -364,10 +369,13 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in: cursor on the page to compress; + btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; cursor does not stay valid if compression occurs */ - mtr_t* mtr); /*!< in: mtr */ + ibool adjust, /*!< in: TRUE if should adjust the + cursor position even if compression occurs */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull)); /*******************************************************//** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, @@ -492,6 +500,27 @@ btr_cur_disown_inherited_fields( const upd_t* update, /*!< in: update vector */ mtr_t* mtr) /*!< in/out: mini-transaction */ __attribute__((nonnull(2,3,4,5,6))); + +/** Operation code for btr_store_big_rec_extern_fields(). */ +enum blob_op { + /** Store off-page columns for a freshly inserted record */ + BTR_STORE_INSERT = 0, + /** Store off-page columns for an insert by update */ + BTR_STORE_INSERT_UPDATE, + /** Store off-page columns for an update */ + BTR_STORE_UPDATE +}; + +/*******************************************************************//** +Determine if an operation on off-page columns is an update. +@return TRUE if op != BTR_STORE_INSERT */ +UNIV_INLINE +ibool +btr_blob_op_is_update( +/*==================*/ + enum blob_op op) /*!< in: operation */ + __attribute__((warn_unused_result)); + /*******************************************************************//** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. @@ -499,52 +528,23 @@ The fields are stored on pages allocated from leaf node file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN -ulint -btr_store_big_rec_extern_fields_func( -/*=================================*/ +enum db_err +btr_store_big_rec_extern_fields( +/*============================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ - rec_t* rec, /*!< in: record */ + rec_t* rec, /*!< in/out: record */ const ulint* offsets, /*!< in: rec_get_offsets(rec, index); the "external storage" flags in offsets will not correspond to rec when this function returns */ -#ifdef UNIV_DEBUG - mtr_t* local_mtr, /*!< in: mtr containing the - latch to rec and to the tree */ -#endif /* UNIV_DEBUG */ -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ibool update_in_place,/*! in: TRUE if the record is updated - in place (not delete+insert) */ -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - const big_rec_t*big_rec_vec) /*!< in: vector containing fields + const big_rec_t*big_rec_vec, /*!< in: vector containing fields to be stored externally */ - __attribute__((nonnull)); - -/** Stores the fields in big_rec_vec to the tablespace and puts pointers to -them in rec. The extern flags in rec will have to be set beforehand. -The fields are stored on pages allocated from leaf node -file segment of the index tree. -@param index in: clustered index; MUST be X-latched by mtr -@param b in/out: block containing rec; MUST be X-latched by mtr -@param rec in/out: clustered index record -@param offsets in: rec_get_offsets(rec, index); - the "external storage" flags in offsets will not be adjusted -@param mtr in: mini-transaction that holds x-latch on index and b -@param upd in: TRUE if the record is updated in place (not delete+insert) -@param big in: vector containing fields to be stored externally -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -#ifdef UNIV_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) -#elif defined UNIV_BLOB_LIGHT_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) -#else -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) -#endif + mtr_t* btr_mtr, /*!< in: mtr containing the + latches to the clustered index */ + enum blob_op op) /*! in: operation code */ + __attribute__((nonnull, warn_unused_result)); /*******************************************************************//** Frees the space in an externally stored field to the file space diff --git a/include/btr0cur.ic b/include/btr0cur.ic index 280583f6ccf..e31f77c77eb 100644 --- a/include/btr0cur.ic +++ b/include/btr0cur.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -139,7 +139,7 @@ btr_cur_compress_recommendation( btr_cur_t* cursor, /*!< in: btr cursor */ mtr_t* mtr) /*!< in: mtr */ { - page_t* page; + const page_t* page; ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); @@ -197,4 +197,25 @@ btr_cur_can_delete_without_compress( return(TRUE); } + +/*******************************************************************//** +Determine if an operation on off-page columns is an update. +@return TRUE if op != BTR_STORE_INSERT */ +UNIV_INLINE +ibool +btr_blob_op_is_update( +/*==================*/ + enum blob_op op) /*!< in: operation */ +{ + switch (op) { + case BTR_STORE_INSERT: + return(FALSE); + case BTR_STORE_INSERT_UPDATE: + case BTR_STORE_UPDATE: + return(TRUE); + } + + ut_ad(0); + return(FALSE); +} #endif /* !UNIV_HOTBACKUP */ diff --git a/include/btr0pcur.h b/include/btr0pcur.h index 140f94466db..2ebd70a6f23 100644 --- a/include/btr0pcur.h +++ b/include/btr0pcur.h @@ -53,6 +53,16 @@ UNIV_INTERN btr_pcur_t* btr_pcur_create_for_mysql(void); /*============================*/ + +/**************************************************************//** +Resets a persistent cursor object, freeing ::old_rec_buf if it is +allocated and resetting the other members to their initial values. */ +UNIV_INTERN +void +btr_pcur_reset( +/*===========*/ + btr_pcur_t* cursor);/*!< in, out: persistent cursor */ + /**************************************************************//** Frees the memory for a persistent cursor object. */ UNIV_INTERN diff --git a/include/buf0buf.h b/include/buf0buf.h index 0ea74fb9eb2..7502942d681 100644 --- a/include/buf0buf.h +++ b/include/buf0buf.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -611,6 +611,31 @@ buf_block_get_modify_clock( #else /* !UNIV_HOTBACKUP */ # define buf_block_modify_clock_inc(block) ((void) 0) #endif /* !UNIV_HOTBACKUP */ +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_buf_fix_inc_func( +/*=======================*/ +#ifdef UNIV_SYNC_DEBUG + const char* file, /*!< in: file name */ + ulint line, /*!< in: line */ +#endif /* UNIV_SYNC_DEBUG */ + buf_block_t* block) /*!< in/out: block to bufferfix */ + __attribute__((nonnull)); +#ifdef UNIV_SYNC_DEBUG +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) +#else /* UNIV_SYNC_DEBUG */ +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) +#endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** Calculates a page checksum which is stored to the page when it is written to a file. Note that we must be careful to calculate the same value @@ -700,6 +725,13 @@ buf_print(void); /*============*/ #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* !UNIV_HOTBACKUP */ +enum buf_page_print_flags { + /** Do not crash at the end of buf_page_print(). */ + BUF_PAGE_PRINT_NO_CRASH = 1, + /** Do not print the full page dump. */ + BUF_PAGE_PRINT_NO_FULL = 2 +}; + /********************************************************************//** Prints a page to stderr. */ UNIV_INTERN @@ -707,8 +739,12 @@ void buf_page_print( /*===========*/ const byte* read_buf, /*!< in: a database page */ - ulint zip_size); /*!< in: compressed page size, or + ulint zip_size, /*!< in: compressed page size, or 0 for uncompressed pages */ + ulint flags) /*!< in: 0 or + BUF_PAGE_PRINT_NO_CRASH or + BUF_PAGE_PRINT_NO_FULL */ + __attribute__((nonnull)); /********************************************************************//** Decompress a block. @return TRUE if successful */ @@ -773,11 +809,11 @@ buf_all_freed(void); /*********************************************************************//** Checks that there currently are no pending i/o-operations for the buffer pool. -@return TRUE if there is no pending i/o */ +@return number of pending i/o operations */ UNIV_INTERN -ibool -buf_pool_check_no_pending_io(void); -/*==============================*/ +ulint +buf_pool_check_num_pending_io(void); +/*===============================*/ /*********************************************************************//** Invalidates the file pages in the buffer pool when an archive recovery is completed. All the file pages buffered must be in a replaceable state when diff --git a/include/buf0buf.ic b/include/buf0buf.ic index 55d89b66375..66006d366c6 100644 --- a/include/buf0buf.ic +++ b/include/buf0buf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -1026,19 +1026,6 @@ buf_block_buf_fix_inc_func( block->page.buf_fix_count++; } -#ifdef UNIV_SYNC_DEBUG -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) -#else /* UNIV_SYNC_DEBUG */ -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) -#endif /* UNIV_SYNC_DEBUG */ /*******************************************************************//** Decrements the bufferfix count. */ @@ -1295,7 +1282,7 @@ buf_block_dbg_add_level( where we have acquired latch */ ulint level) /*!< in: latching order level */ { - sync_thread_add_level(&block->lock, level); + sync_thread_add_level(&block->lock, level, FALSE); } #endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** diff --git a/include/buf0lru.h b/include/buf0lru.h index 700136ec488..c3672a65ed7 100644 --- a/include/buf0lru.h +++ b/include/buf0lru.h @@ -64,15 +64,14 @@ These are low-level functions #define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b)) /******************************************************************//** -Invalidates all pages belonging to a given tablespace when we are deleting -the data file(s) of that tablespace. A PROBLEM: if readahead is being started, -what guarantees that it will not try to read in pages after this operation has -completed? */ +Removes all pages belonging to a given tablespace. */ UNIV_INTERN void -buf_LRU_invalidate_tablespace( +buf_LRU_flush_or_remove_pages( /*==========================*/ - ulint id); /*!< in: space id */ + ulint id, /*!< in: space id */ + enum buf_remove_t buf_remove);/*!< in: remove or flush + strategy */ /******************************************************************//** */ diff --git a/include/buf0types.h b/include/buf0types.h index d140936a886..a6e947dc21a 100644 --- a/include/buf0types.h +++ b/include/buf0types.h @@ -63,6 +63,15 @@ enum buf_io_fix { the flush_list */ }; +/** Algorithm to remove the pages for a tablespace from the buffer pool. +@See buf_LRU_flush_or_remove_pages(). */ +enum buf_remove_t { + BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer + pool, don't write or sync to disk */ + BUF_REMOVE_FLUSH_NO_WRITE /*!< Remove only, from the flush list, + don't write or sync to disk */ +}; + /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ /* @{ */ #define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT diff --git a/include/data0data.h b/include/data0data.h index f7bdd29ed90..6d3c2988fdc 100644 --- a/include/data0data.h +++ b/include/data0data.h @@ -231,6 +231,26 @@ dtuple_set_n_fields_cmp( dtuple_t* tuple, /*!< in: tuple */ ulint n_fields_cmp); /*!< in: number of fields used in comparisons in rem0cmp.* */ + +/* Estimate the number of bytes that are going to be allocated when +creating a new dtuple_t object */ +#define DTUPLE_EST_ALLOC(n_fields) \ + (sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t)) + +/**********************************************************//** +Creates a data tuple from an already allocated chunk of memory. +The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). +The default value for number of fields used in record comparisons +for this tuple is n_fields. +@return created tuple (inside buf) */ +UNIV_INLINE +dtuple_t* +dtuple_create_from_mem( +/*===================*/ + void* buf, /*!< in, out: buffer to use */ + ulint buf_size, /*!< in: buffer size */ + ulint n_fields); /*!< in: number of fields */ + /**********************************************************//** Creates a data tuple to a memory heap. The default value for number of fields used in record comparisons for this tuple is n_fields. @@ -240,7 +260,8 @@ dtuple_t* dtuple_create( /*==========*/ mem_heap_t* heap, /*!< in: memory heap where the tuple - is created */ + is created, DTUPLE_EST_ALLOC(n_fields) + bytes will be allocated from this heap */ ulint n_fields); /*!< in: number of fields */ /**********************************************************//** diff --git a/include/data0data.ic b/include/data0data.ic index 5c0f8039c80..205fa397987 100644 --- a/include/data0data.ic +++ b/include/data0data.ic @@ -348,23 +348,25 @@ dtuple_get_nth_field( #endif /* UNIV_DEBUG */ /**********************************************************//** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. -@return own: created tuple */ +Creates a data tuple from an already allocated chunk of memory. +The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). +The default value for number of fields used in record comparisons +for this tuple is n_fields. +@return created tuple (inside buf) */ UNIV_INLINE dtuple_t* -dtuple_create( -/*==========*/ - mem_heap_t* heap, /*!< in: memory heap where the tuple - is created */ - ulint n_fields) /*!< in: number of fields */ +dtuple_create_from_mem( +/*===================*/ + void* buf, /*!< in, out: buffer to use */ + ulint buf_size, /*!< in: buffer size */ + ulint n_fields) /*!< in: number of fields */ { dtuple_t* tuple; - ut_ad(heap); + ut_ad(buf != NULL); + ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields)); - tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t) - + n_fields * sizeof(dfield_t)); + tuple = (dtuple_t*) buf; tuple->info_bits = 0; tuple->n_fields = n_fields; tuple->n_fields_cmp = n_fields; @@ -386,9 +388,38 @@ dtuple_create( dfield_get_type(field)->mtype = DATA_ERROR; } } +#endif + return(tuple); +} +/**********************************************************//** +Creates a data tuple to a memory heap. The default value for number +of fields used in record comparisons for this tuple is n_fields. +@return own: created tuple */ +UNIV_INLINE +dtuple_t* +dtuple_create( +/*==========*/ + mem_heap_t* heap, /*!< in: memory heap where the tuple + is created, DTUPLE_EST_ALLOC(n_fields) + bytes will be allocated from this heap */ + ulint n_fields) /*!< in: number of fields */ +{ + void* buf; + ulint buf_size; + dtuple_t* tuple; + + ut_ad(heap); + + buf_size = DTUPLE_EST_ALLOC(n_fields); + buf = mem_heap_alloc(heap, buf_size); + + tuple = dtuple_create_from_mem(buf, buf_size, n_fields); + +#ifdef UNIV_DEBUG UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields); #endif + return(tuple); } diff --git a/include/db0err.h b/include/db0err.h index e0952f0709d..95ccef16be0 100644 --- a/include/db0err.h +++ b/include/db0err.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -112,6 +112,8 @@ enum db_err { limit */ DB_INDEX_CORRUPT, /* we have corrupted index */ DB_UNDO_RECORD_TOO_BIG, /* the undo log record is too big */ + DB_TABLE_IN_FK_CHECK, /* table is being used in foreign + key check */ /* The following are partial failure codes */ DB_FAIL = 1000, diff --git a/include/dict0boot.h b/include/dict0boot.h index a817775c93c..9905217ccf7 100644 --- a/include/dict0boot.h +++ b/include/dict0boot.h @@ -94,6 +94,26 @@ void dict_create(void); /*=============*/ +/*****************************************************************//** +Verifies the SYS_STATS table by scanning its clustered index. This +function may only be called at InnoDB startup time. + +@return TRUE if SYS_STATS was verified successfully */ +UNIV_INTERN +ibool +dict_verify_xtradb_sys_stats(void); +/*==============================*/ + +/*****************************************************************//** +Discard the existing dictionary cache SYS_STATS information, create and +add it there anew. Does not touch the old SYS_STATS tablespace page +under the assumption that they are corrupted or overwritten for other +purposes. */ +UNIV_INTERN +void +dict_recreate_xtradb_sys_stats(void); +/*================================*/ + /* Space id and page no where the dictionary header resides */ #define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ diff --git a/include/dict0dict.h b/include/dict0dict.h index fb5285fae0b..6ce1aed17b7 100644 --- a/include/dict0dict.h +++ b/include/dict0dict.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -1087,14 +1087,6 @@ dict_index_get_page( /*================*/ const dict_index_t* tree); /*!< in: index */ /*********************************************************************//** -Sets the page number of the root of index tree. */ -UNIV_INLINE -void -dict_index_set_page( -/*================*/ - dict_index_t* index, /*!< in/out: index */ - ulint page); /*!< in: page number */ -/*********************************************************************//** Gets the read-write lock of the index tree. @return read-write lock */ UNIV_INLINE diff --git a/include/dict0dict.ic b/include/dict0dict.ic index ecb5ec0a408..1c09722e8d7 100644 --- a/include/dict0dict.ic +++ b/include/dict0dict.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -761,21 +761,6 @@ dict_index_get_page( return(index->page); } -/*********************************************************************//** -Sets the page number of the root of index tree. */ -UNIV_INLINE -void -dict_index_set_page( -/*================*/ - dict_index_t* index, /*!< in/out: index */ - ulint page) /*!< in: page number */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->page = page; -} - /*********************************************************************//** Gets the read-write lock of the index tree. @return read-write lock */ diff --git a/include/dict0mem.h b/include/dict0mem.h index 4701fcd87f9..54593a0b9c7 100644 --- a/include/dict0mem.h +++ b/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -396,7 +396,9 @@ struct dict_index_struct{ unsigned to_be_dropped:1; /*!< TRUE if this index is marked to be dropped in ha_innobase::prepare_drop_index(), - otherwise FALSE */ + otherwise FALSE. Protected by + dict_sys->mutex, dict_operation_lock and + index->lock.*/ dict_field_t* fields; /*!< array of field descriptions */ #ifndef UNIV_HOTBACKUP UT_LIST_NODE_T(dict_index_t) diff --git a/include/fil0fil.h b/include/fil0fil.h index 92fdca0db1b..19bf5960ae4 100644 --- a/include/fil0fil.h +++ b/include/fil0fil.h @@ -348,20 +348,19 @@ fil_read_first_page( ib_uint64_t* max_flushed_lsn); /*!< out: max of flushed lsn values in data files */ /*******************************************************************//** -Increments the count of pending insert buffer page merges, if space is not -being deleted. -@return TRUE if being deleted, and ibuf merges should be skipped */ +Increments the count of pending operation, if space is not being deleted. +@return TRUE if being deleted, and operation should be skipped */ UNIV_INTERN ibool -fil_inc_pending_ibuf_merges( -/*========================*/ +fil_inc_pending_ops( +/*================*/ ulint id); /*!< in: space id */ /*******************************************************************//** -Decrements the count of pending insert buffer page merges. */ +Decrements the count of pending operations. */ UNIV_INTERN void -fil_decr_pending_ibuf_merges( -/*=========================*/ +fil_decr_pending_ops( +/*=================*/ ulint id); /*!< in: space id */ #endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** @@ -400,7 +399,9 @@ UNIV_INTERN ibool fil_delete_tablespace( /*==================*/ - ulint id); /*!< in: space id */ + ulint id, /*!< in: space id */ + ibool evict_all); /*!< in: TRUE if we want all pages + evicted from LRU. */ #ifndef UNIV_HOTBACKUP /*******************************************************************//** Discards a single-table tablespace. The tablespace must be cached in the diff --git a/include/fsp0fsp.h b/include/fsp0fsp.h index d5e235aa870..f07e3decc66 100644 --- a/include/fsp0fsp.h +++ b/include/fsp0fsp.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -260,30 +260,33 @@ fseg_n_reserved_pages( Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return the allocated page offset FIL_NULL if no page could be allocated */ -UNIV_INTERN -ulint -fseg_alloc_free_page( -/*=================*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ - byte direction, /*!< in: if the new page is needed because +@param[in/out] seg_header segment header +@param[in] hint hint of which page would be desirable +@param[in] direction if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /*!< in: mtr handle */ + FSP_UP, FSP_NO_DIR +@param[in/out] mtr mini-transaction +@return X-latched block, or NULL if no page could be allocated */ +#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \ + fseg_alloc_free_page_general(seg_header, hint, direction, \ + FALSE, mtr, mtr) /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return allocated page offset, FIL_NULL if no page could be allocated */ +@retval NULL if no page could be allocated +@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded +(init_mtr == mtr, or the page was not previously freed in mtr) +@retval block (not allocated or initialized) otherwise */ UNIV_INTERN -ulint +buf_block_t* fseg_alloc_free_page_general( /*=========================*/ - fseg_header_t* seg_header,/*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ + fseg_header_t* seg_header,/*!< in/out: segment header */ + ulint hint, /*!< in: hint of which page would be + desirable */ byte direction,/*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which @@ -294,7 +297,12 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized. + If init_mtr!=mtr, but the page is already + latched in mtr, do not initialize the page. */ + __attribute__((warn_unused_result, nonnull)); /**********************************************************************//** Reserves free pages from a tablespace. All mini-transactions which may use several pages from the tablespace should call this function beforehand diff --git a/include/log0log.h b/include/log0log.h index e0bffe6f725..c39c7b0c126 100644 --- a/include/log0log.h +++ b/include/log0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/include/mem0mem.ic b/include/mem0mem.ic index d214c3fe6c9..c70615e1ca9 100644 --- a/include/mem0mem.ic +++ b/include/mem0mem.ic @@ -208,10 +208,6 @@ mem_heap_alloc( caller */ buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; -#endif -#ifdef UNIV_SET_MEM_TO_ZERO - UNIV_MEM_ALLOC(buf, n); - memset(buf, '\0', n); #endif UNIV_MEM_ALLOC(buf, n); return(buf); diff --git a/include/mtr0log.ic b/include/mtr0log.ic index fa9ac014f4e..3ed1183f5c0 100644 --- a/include/mtr0log.ic +++ b/include/mtr0log.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -217,6 +217,7 @@ mlog_write_initial_log_record_fast( "Please post a bug report to " "bugs.mysql.com.\n", type, offset, space); + ut_ad(0); } } diff --git a/include/mtr0mtr.h b/include/mtr0mtr.h index 7f608546cc2..46f1ff9310c 100644 --- a/include/mtr0mtr.h +++ b/include/mtr0mtr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -50,7 +50,9 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ #define MTR_MEMO_PAGE_S_FIX RW_S_LATCH #define MTR_MEMO_PAGE_X_FIX RW_X_LATCH #define MTR_MEMO_BUF_FIX RW_NO_LATCH -#define MTR_MEMO_MODIFY 54 +#ifdef UNIV_DEBUG +# define MTR_MEMO_MODIFY 54 +#endif /* UNIV_DEBUG */ #define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_X_LOCK 56 @@ -376,6 +378,9 @@ struct mtr_struct{ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ + ulint n_freed_pages; + /* number of pages that have been freed in + this mini-transaction */ ulint log_mode; /* specifies which operations should be logged; default value MTR_LOG_ALL */ ib_uint64_t start_lsn;/* start lsn of the possible log entry for diff --git a/include/mtr0mtr.ic b/include/mtr0mtr.ic index 1db4a4bd735..a03a0271535 100644 --- a/include/mtr0mtr.ic +++ b/include/mtr0mtr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -46,6 +46,7 @@ mtr_start( mtr->modifications = FALSE; mtr->inside_ibuf = FALSE; mtr->n_log_recs = 0; + mtr->n_freed_pages = 0; ut_d(mtr->state = MTR_ACTIVE); ut_d(mtr->magic_n = MTR_MAGIC_N); @@ -248,7 +249,7 @@ mtr_s_lock_func( ut_ad(mtr); ut_ad(lock); - rw_lock_s_lock_func(lock, 0, file, line); + rw_lock_s_lock_inline(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); } @@ -267,7 +268,7 @@ mtr_x_lock_func( ut_ad(mtr); ut_ad(lock); - rw_lock_x_lock_func(lock, 0, file, line); + rw_lock_x_lock_inline(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); } diff --git a/include/page0page.h b/include/page0page.h index 616939586e0..b4755dce61d 100644 --- a/include/page0page.h +++ b/include/page0page.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -281,16 +281,42 @@ page_get_supremum_offset( const page_t* page); /*!< in: page which must have record(s) */ #define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page)) #define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page)) + /************************************************************//** -Returns the middle record of record list. If there are an even number -of records in the list, returns the first record of upper half-list. -@return middle record */ +Returns the nth record of the record list. +This is the inverse function of page_rec_get_n_recs_before(). +@return nth record */ UNIV_INTERN +const rec_t* +page_rec_get_nth_const( +/*===================*/ + const page_t* page, /*!< in: page */ + ulint nth) /*!< in: nth record */ + __attribute__((nonnull, warn_unused_result)); +/************************************************************//** +Returns the nth record of the record list. +This is the inverse function of page_rec_get_n_recs_before(). +@return nth record */ +UNIV_INLINE +rec_t* +page_rec_get_nth( +/*=============*/ + page_t* page, /*< in: page */ + ulint nth) /*!< in: nth record */ + __attribute__((nonnull, warn_unused_result)); + +#ifndef UNIV_HOTBACKUP +/************************************************************//** +Returns the middle record of the records on the page. If there is an +even number of records in the list, returns the first record of the +upper half-list. +@return middle record */ +UNIV_INLINE rec_t* page_get_middle_rec( /*================*/ - page_t* page); /*!< in: page */ -#ifndef UNIV_HOTBACKUP + page_t* page) /*!< in: page */ + __attribute__((nonnull, warn_unused_result)); /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an @@ -345,6 +371,7 @@ page_get_n_recs( /***************************************************************//** Returns the number of records before the given record in chain. The number includes infimum and supremum records. +This is the inverse function of page_rec_get_nth(). @return number of records */ UNIV_INTERN ulint diff --git a/include/page0page.ic b/include/page0page.ic index 1bac2fb53fb..c1b99b8f187 100644 --- a/include/page0page.ic +++ b/include/page0page.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -422,7 +422,37 @@ page_rec_is_infimum( return(page_rec_is_infimum_low(page_offset(rec))); } +/************************************************************//** +Returns the nth record of the record list. +This is the inverse function of page_rec_get_n_recs_before(). +@return nth record */ +UNIV_INLINE +rec_t* +page_rec_get_nth( +/*=============*/ + page_t* page, /*!< in: page */ + ulint nth) /*!< in: nth record */ +{ + return((rec_t*) page_rec_get_nth_const(page, nth)); +} + #ifndef UNIV_HOTBACKUP +/************************************************************//** +Returns the middle record of the records on the page. If there is an +even number of records in the list, returns the first record of the +upper half-list. +@return middle record */ +UNIV_INLINE +rec_t* +page_get_middle_rec( +/*================*/ + page_t* page) /*!< in: page */ +{ + ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; + + return(page_rec_get_nth(page, middle)); +} + /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an @@ -713,7 +743,7 @@ page_rec_get_next_low( (void*) rec, (ulong) page_get_space_id(page), (ulong) page_get_page_no(page)); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); ut_error; } diff --git a/include/row0mysql.h b/include/row0mysql.h index 652adea167f..5b1d130d480 100644 --- a/include/row0mysql.h +++ b/include/row0mysql.h @@ -168,7 +168,9 @@ UNIV_INTERN row_prebuilt_t* row_create_prebuilt( /*================*/ - dict_table_t* table); /*!< in: Innobase table handle */ + dict_table_t* table, /*!< in: Innobase table handle */ + ulint mysql_row_len); /*!< in: length in bytes of a row in + the MySQL format */ /********************************************************************//** Free a prebuilt struct for a MySQL table handle. */ UNIV_INTERN @@ -688,9 +690,9 @@ struct row_prebuilt_struct { in inserts */ que_fork_t* upd_graph; /*!< Innobase SQL query graph used in updates or deletes */ - btr_pcur_t* pcur; /*!< persistent cursor used in selects + btr_pcur_t pcur; /*!< persistent cursor used in selects and updates */ - btr_pcur_t* clust_pcur; /*!< persistent cursor used in + btr_pcur_t clust_pcur; /*!< persistent cursor used in some selects and updates */ que_fork_t* sel_graph; /*!< dummy query graph used in selects */ diff --git a/include/row0sel.h b/include/row0sel.h index 8544b9d08ba..1c4ea6f7244 100644 --- a/include/row0sel.h +++ b/include/row0sel.h @@ -128,7 +128,12 @@ row_sel_convert_mysql_key_to_innobase( in the tuple is already according to index! */ byte* buf, /*!< in: buffer to use in field - conversions */ + conversions; NOTE that dtuple->data + may end up pointing inside buf so + do not discard that buffer while + the tuple is being used. See + row_mysql_store_col_in_innobase_format() + in the case of DATA_INT */ ulint buf_len, /*!< in: buffer length */ dict_index_t* index, /*!< in: index of the key value */ const byte* key_ptr, /*!< in: MySQL key value */ diff --git a/include/srv0srv.h b/include/srv0srv.h index 248e1efff00..9e860ef285c 100644 --- a/include/srv0srv.h +++ b/include/srv0srv.h @@ -226,6 +226,9 @@ extern unsigned long long srv_stats_sample_pages; extern ulint srv_stats_auto_update; extern ulint srv_stats_update_need_lock; extern ibool srv_use_sys_stats_table; +#ifdef UNIV_DEBUG +extern ulong srv_sys_stats_root_page; +#endif extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; @@ -285,9 +288,6 @@ extern ibool srv_print_lock_waits; extern ibool srv_print_buf_io; extern ibool srv_print_log_io; extern ibool srv_print_latch_waits; - -extern ulint srv_flush_checkpoint_debug; - #else /* UNIV_DEBUG */ # define srv_print_thread_releases FALSE # define srv_print_lock_waits FALSE @@ -734,12 +734,14 @@ srv_que_task_enqueue_low( que_thr_t* thr); /*!< in: query thread */ /**********************************************************************//** -Check whether any background thread is active. -@return FALSE if all are are suspended or have exited. */ +Check whether any background thread is active. If so, return the thread +type. +@return ULINT_UNDEFINED if all are are suspended or have exited, thread +type if any are still active. */ UNIV_INTERN -ibool -srv_is_any_background_thread_active(void); -/*======================================*/ +ulint +srv_get_active_thread_type(void); +/*============================*/ /** Status variables to be passed to MySQL */ struct export_var_struct{ diff --git a/include/sync0rw.h b/include/sync0rw.h index 971099c91f5..6159d8d0c81 100644 --- a/include/sync0rw.h +++ b/include/sync0rw.h @@ -155,6 +155,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock(M) \ rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) +# define rw_lock_s_lock_inline(M, P, F, L) \ + rw_lock_s_lock_func((M), (P), (F), (L)) + # define rw_lock_s_lock_gen(M, P) \ rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) @@ -171,12 +174,18 @@ unlocking, not the corresponding function. */ # define rw_lock_x_lock(M) \ rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) +# define rw_lock_x_lock_inline(M, P, F, L) \ + rw_lock_x_lock_func((M), (P), (F), (L)) + # define rw_lock_x_lock_gen(M, P) \ rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) # define rw_lock_x_lock_nowait(M) \ rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) +# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ + rw_lock_x_lock_func_nowait((M), (F), (L)) + # ifdef UNIV_SYNC_DEBUG # define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L) # else @@ -208,6 +217,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock(M) \ pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) +# define rw_lock_s_lock_inline(M, P, F, L) \ + pfs_rw_lock_s_lock_func((M), (P), (F), (L)) + # define rw_lock_s_lock_gen(M, P) \ pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) @@ -223,12 +235,18 @@ unlocking, not the corresponding function. */ # define rw_lock_x_lock(M) \ pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) +# define rw_lock_x_lock_inline(M, P, F, L) \ + pfs_rw_lock_x_lock_func((M), (P), (F), (L)) + # define rw_lock_x_lock_gen(M, P) \ pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) # define rw_lock_x_lock_nowait(M) \ pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) +# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ + pfs_rw_lock_x_lock_func_nowait((M), (F), (L)) + # ifdef UNIV_SYNC_DEBUG # define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L) # else diff --git a/include/sync0rw.ic b/include/sync0rw.ic index 3eaa6172631..73e1f880aad 100644 --- a/include/sync0rw.ic +++ b/include/sync0rw.ic @@ -90,7 +90,7 @@ rw_lock_set_waiter_flag( rw_lock_t* lock) /*!< in/out: rw-lock */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - os_compare_and_swap_ulint(&lock->waiters, 0, 1); + (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 1; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -107,7 +107,7 @@ rw_lock_reset_waiter_flag( rw_lock_t* lock) /*!< in/out: rw-lock */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - os_compare_and_swap_ulint(&lock->waiters, 1, 0); + (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 0; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -564,8 +564,6 @@ rw_lock_x_unlock_func( if (lock->lock_word == 0) { /* Last caller in a possible recursive chain. */ lock->recursive = FALSE; - UNIV_MEM_INVALID(&lock->writer_thread, - sizeof lock->writer_thread); } #ifdef UNIV_SYNC_DEBUG @@ -610,8 +608,6 @@ rw_lock_x_unlock_direct( if (lock->lock_word == 0) { lock->recursive = FALSE; - UNIV_MEM_INVALID(&lock->writer_thread, - sizeof lock->writer_thread); } lock->lock_word += X_LOCK_DECR; diff --git a/include/sync0sync.h b/include/sync0sync.h index 1437e797f6f..33c95a94804 100644 --- a/include/sync0sync.h +++ b/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -404,8 +404,10 @@ void sync_thread_add_level( /*==================*/ void* latch, /*!< in: pointer to a mutex or an rw-lock */ - ulint level); /*!< in: level in the latching order; if + ulint level, /*!< in: level in the latching order; if SYNC_LEVEL_VARYING, nothing is done */ + ibool relock) /*!< in: TRUE if re-entering an x-lock */ + __attribute__((nonnull)); /******************************************************************//** Removes a latch from the thread level array if it is found there. @return TRUE if found in the array; it is no error if the latch is diff --git a/include/trx0purge.h b/include/trx0purge.h index 0b83a76cab7..2bd9e64476b 100644 --- a/include/trx0purge.h +++ b/include/trx0purge.h @@ -143,9 +143,9 @@ struct trx_purge_struct{ obtaining an s-latch here. */ read_view_t* view; /*!< The purge will not remove undo logs which are >= this view (purge view) */ - ulint n_pages_handled;/*!< Approximate number of undo log + ulonglong n_pages_handled;/*!< Approximate number of undo log pages processed in purge */ - ulint handle_limit; /*!< Target of how many pages to get + ulonglong handle_limit; /*!< Target of how many pages to get processed in the current purge */ /*------------------------------*/ /* The following two fields form the 'purge pointer' which advances diff --git a/include/trx0rec.ic b/include/trx0rec.ic index f0b3276ed44..4fc5a7147f9 100644 --- a/include/trx0rec.ic +++ b/include/trx0rec.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -107,6 +107,7 @@ trx_undo_rec_copy( len = mach_read_from_2(undo_rec) - ut_align_offset(undo_rec, UNIV_PAGE_SIZE); + ut_ad(len < UNIV_PAGE_SIZE); return(mem_heap_dup(heap, undo_rec, len)); } #endif /* !UNIV_HOTBACKUP */ diff --git a/include/trx0rseg.ic b/include/trx0rseg.ic index daffa92fc7d..5e8d2b41120 100644 --- a/include/trx0rseg.ic +++ b/include/trx0rseg.ic @@ -25,6 +25,7 @@ Created 3/26/1996 Heikki Tuuri #include "srv0srv.h" #include "mtr0log.h" +#include "trx0sys.h" /******************************************************************//** Gets a rollback segment header. @@ -131,7 +132,13 @@ trx_rsegf_undo_find_free( ulint i; ulint page_no; - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { + for (i = 0; +#ifndef UNIV_DEBUG + i < TRX_RSEG_N_SLOTS; +#else + i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS); +#endif + i++) { page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr); diff --git a/include/trx0sys.h b/include/trx0sys.h index 495ce0e1184..976cb31563f 100644 --- a/include/trx0sys.h +++ b/include/trx0sys.h @@ -249,6 +249,12 @@ trx_id_t trx_sys_get_new_trx_id(void); /*========================*/ #endif /* !UNIV_HOTBACKUP */ + +#ifdef UNIV_DEBUG +/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */ +extern uint trx_rseg_n_slots_debug; +#endif + /*****************************************************************//** Writes a trx id to an index page. In case that the id size changes in some future version, this function should be used instead of diff --git a/include/trx0undo.h b/include/trx0undo.h index 50aa6d0ac09..4a1e40af505 100644 --- a/include/trx0undo.h +++ b/include/trx0undo.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -194,16 +194,17 @@ trx_undo_get_first_rec( mtr_t* mtr); /*!< in: mtr */ /********************************************************************//** Tries to add a page to the undo log segment where the undo log is placed. -@return page number if success, else FIL_NULL */ +@return X-latched block if success, else NULL */ UNIV_INTERN -ulint +buf_block_t* trx_undo_add_page( /*==============*/ trx_t* trx, /*!< in: transaction */ trx_undo_t* undo, /*!< in: undo log memory object */ - mtr_t* mtr); /*!< in: mtr which does not have a latch to any + mtr_t* mtr) /*!< in: mtr which does not have a latch to any undo log page; the caller must have reserved the rollback segment mutex */ + __attribute__((nonnull, warn_unused_result)); /********************************************************************//** Frees the last undo log page. The caller must hold the rollback segment mutex. */ diff --git a/include/univ.i b/include/univ.i index 195c09c8163..003f61ceb95 100644 --- a/include/univ.i +++ b/include/univ.i @@ -160,14 +160,6 @@ resolved */ /* DEBUG VERSION CONTROL ===================== */ -/* The following flag will make InnoDB to initialize -all memory it allocates to zero. It hides Purify -warnings about reading unallocated memory unless -memory is read outside the allocated blocks. */ -/* -#define UNIV_INIT_MEM_TO_ZERO -*/ - /* When this macro is defined then additional test functions will be compiled. These functions live at the end of each relevant source file and have "test_" prefix. These functions are not called from anywhere in @@ -237,15 +229,6 @@ operations (very slow); also UNIV_DEBUG must be defined */ #define UNIV_BTR_DEBUG /* check B-tree links */ #define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ -#ifdef HAVE_purify -/* The following sets all new allocated memory to zero before use: -this can be used to eliminate unnecessary Purify warnings, but note that -it also masks many bugs Purify could detect. For detailed Purify analysis it -is best to remove the define below and look through the warnings one -by one. */ -#define UNIV_SET_MEM_TO_ZERO -#endif - /* #define UNIV_SQL_DEBUG #define UNIV_LOG_DEBUG @@ -330,11 +313,17 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 -/* The maximum length of a table name. This is the MySQL limit and is -defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the -number does not include a terminating '\0'. InnoDB probably can handle -longer names internally */ -#define MAX_TABLE_NAME_LEN 192 +/** This is the "mbmaxlen" for my_charset_filename (defined in +strings/ctype-utf8.c), which is used to encode File and Database names. */ +#define FILENAME_CHARSET_MAXNAMLEN 5 + +/** The maximum length of an encode table name in bytes. The max +table and database names are NAME_CHAR_LEN (64) characters. After the +encoding, the max length would be NAME_CHAR_LEN (64) * +FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a +terminating '\0'. InnoDB can handle longer names internally */ +#define MAX_TABLE_NAME_LEN 320 + /* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is the MySQL's NAME_LEN, see check_and_convert_db_name(). */ diff --git a/include/ut0mem.h b/include/ut0mem.h index faf6f242883..39f5f20dc6d 100644 --- a/include/ut0mem.h +++ b/include/ut0mem.h @@ -78,40 +78,19 @@ ut_mem_init(void); /*=============*/ /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. +Allocates memory. @return own: allocated memory */ UNIV_INTERN void* ut_malloc_low( /*==========*/ ulint n, /*!< in: number of bytes to allocate */ - ibool set_to_zero, /*!< in: TRUE if allocated memory - should be set to zero if - UNIV_SET_MEM_TO_ZERO is defined */ - ibool assert_on_error); /*!< in: if TRUE, we crash mysqld if + ibool assert_on_error) /*!< in: if TRUE, we crash mysqld if the memory cannot be allocated */ + __attribute__((malloc)); /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc( -/*======*/ - ulint n); /*!< in: number of bytes to allocate */ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -ut_test_malloc( -/*===========*/ - ulint n); /*!< in: try to allocate this many bytes */ -#endif /* !UNIV_HOTBACKUP */ +Allocates memory. */ +#define ut_malloc(n) ut_malloc_low(n, TRUE) /**********************************************************************//** Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is a nop. */ diff --git a/include/ut0rnd.ic b/include/ut0rnd.ic index 2c8c959d804..60e213dd19f 100644 --- a/include/ut0rnd.ic +++ b/include/ut0rnd.ic @@ -114,7 +114,7 @@ ut_rnd_interval( rnd = ut_rnd_gen_ulint(); - return(low + (rnd % (high - low + 1))); + return(low + (rnd % (high - low))); } /*********************************************************//** diff --git a/lock/lock0lock.c b/lock/lock0lock.c index 5fce345d0fe..414d3ae2c49 100644 --- a/lock/lock0lock.c +++ b/lock/lock0lock.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,7 @@ Created 5/7/1996 Heikki Tuuri #include "trx0purge.h" #include "dict0mem.h" #include "trx0sys.h" +#include "btr0btr.h" /* Restricts the length of search we will do in the waits-for graph of transactions */ @@ -1615,7 +1616,7 @@ lock_sec_rec_some_has_impl_off_kernel( if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), rec, index, offsets, TRUE)) { - buf_page_print(page, 0); + buf_page_print(page, 0, 0); /* The page is corrupt: try to avoid a crash by returning NULL */ @@ -1691,7 +1692,7 @@ lock_rec_create( page_no = buf_block_get_page_no(block); page = block->frame; - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); + btr_assert_not_corrupted(block, index); /* If rec is the supremum record, then we reset the gap and LOCK_REC_NOT_GAP bits, as all locks on the supremum are @@ -1799,6 +1800,7 @@ lock_rec_enqueue_waiting( "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); } /* Enqueue the lock request that will wait to be granted */ @@ -3808,6 +3810,7 @@ lock_table_enqueue_waiting( "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); } /* Enqueue the lock request that will wait to be granted */ @@ -4984,6 +4987,79 @@ function_exit: return(TRUE); } +/*********************************************************************//** +Validate record locks up to a limit. +@return lock at limit or NULL if no more locks in the hash bucket */ +static __attribute__((nonnull, warn_unused_result)) +const lock_t* +lock_rec_validate( +/*==============*/ + ulint start, /*!< in: lock_sys->rec_hash + bucket */ + ib_uint64_t* limit) /*!< in/out: upper limit of + (space, page_no) */ +{ + lock_t* lock; + ut_ad(mutex_own(&kernel_mutex)); + + for (lock = HASH_GET_FIRST(lock_sys->rec_hash, start); + lock != NULL; + lock = HASH_GET_NEXT(hash, lock)) { + + ib_uint64_t current; + + ut_a(trx_in_trx_list(lock->trx)); + ut_a(lock_get_type(lock) == LOCK_REC); + + current = ut_ull_create( + lock->un_member.rec_lock.space, + lock->un_member.rec_lock.page_no); + + if (current > *limit) { + *limit = current + 1; + return(lock); + } + } + + return(NULL); +} + +/*********************************************************************//** +Validate a record lock's block */ +static +void +lock_rec_block_validate( +/*====================*/ + ulint space, + ulint page_no) +{ + /* The lock and the block that it is referring to may be freed at + this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. + If the lock exists in lock_rec_validate_page() we assert + !block->page.file_page_was_freed. */ + + mtr_t mtr; + buf_block_t* block; + + /* Make sure that the tablespace is not deleted while we are + trying to access the page. */ + if (!fil_inc_pending_ops(space)) { + mtr_start(&mtr); + block = buf_page_get_gen( + space, fil_space_get_zip_size(space), + page_no, RW_X_LATCH, NULL, + BUF_GET_POSSIBLY_FREED, + __FILE__, __LINE__, &mtr); + + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + + ut_ad(lock_rec_validate_page(block)); + mtr_commit(&mtr); + + fil_decr_pending_ops(space); + } +} + /*********************************************************************//** Validates the lock system. @return TRUE if ok */ @@ -5016,60 +5092,21 @@ lock_validate(void) trx = UT_LIST_GET_NEXT(trx_list, trx); } + /* Iterate over all the record locks and validate the locks. We + don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex. + Release both mutexes during the validation check. */ + for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { + const lock_t* lock; + ib_uint64_t limit = 0; - ulint space; - ulint page_no; - ib_uint64_t limit = 0; + while ((lock = lock_rec_validate(i, &limit)) != NULL) { - for (;;) { - mtr_t mtr; - buf_block_t* block; - - lock = HASH_GET_FIRST(lock_sys->rec_hash, i); - - while (lock) { - ib_uint64_t space_page; - ut_a(trx_in_trx_list(lock->trx)); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - space_page = ut_ull_create(space, page_no); - - if (space_page >= limit) { - break; - } - - lock = HASH_GET_NEXT(hash, lock); - } - - if (!lock) { - - break; - } + ulint space = lock->un_member.rec_lock.space; + ulint page_no = lock->un_member.rec_lock.page_no; lock_mutex_exit_kernel(); - - /* The lock and the block that it is referring - to may be freed at this point. We pass - BUF_GET_POSSIBLY_FREED to skip a debug check. - If the lock exists in lock_rec_validate_page() - we assert !block->page.file_page_was_freed. */ - - mtr_start(&mtr); - block = buf_page_get_gen( - space, fil_space_get_zip_size(space), - page_no, RW_X_LATCH, NULL, - BUF_GET_POSSIBLY_FREED, - __FILE__, __LINE__, &mtr); - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - ut_ad(lock_rec_validate_page(block)); - mtr_commit(&mtr); - - limit++; - + lock_rec_block_validate(space, page_no); lock_mutex_enter_kernel(); } } diff --git a/log/log0log.c b/log/log0log.c index 8ba34ef8a83..002de967f82 100644 --- a/log/log0log.c +++ b/log/log0log.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -1691,13 +1691,10 @@ log_preflush_pool_modified_pages( recv_apply_hashed_log_recs(TRUE); } - retry: n_pages = buf_flush_list(ULINT_MAX, new_oldest); - if (sync && n_pages != 0) { - //buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - os_thread_sleep(100000); - goto retry; + if (sync) { + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); } if (n_pages == ULINT_UNDEFINED) { @@ -2019,13 +2016,6 @@ log_checkpoint( { ib_uint64_t oldest_lsn; -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug == 1) { - - return TRUE; - } -#endif - if (recv_recovery_is_on()) { recv_apply_hashed_log_recs(TRUE); } @@ -2117,11 +2107,7 @@ log_make_checkpoint_at( physical write will always be made to log files */ { -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug == 1) - return; -#endif -/* Preflush pages synchronously */ + /* Preflush pages synchronously */ while (!log_preflush_pool_modified_pages(lsn, TRUE)); @@ -2147,13 +2133,7 @@ log_checkpoint_margin(void) ibool checkpoint_sync; ibool do_checkpoint; ibool success; - -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug == 1) - return; -#endif - - loop: +loop: sync = FALSE; checkpoint_sync = FALSE; do_checkpoint = FALSE; @@ -2176,15 +2156,13 @@ log_checkpoint_margin(void) /* A flush is urgent: we have to do a synchronous preflush */ sync = TRUE; - advance = age - log->max_modified_age_sync; + advance = 2 * (age - log->max_modified_age_sync); } else if (age > log_max_modified_age_async()) { /* A flush is not urgent: we do an asynchronous preflush */ advance = age - log_max_modified_age_async(); - log->check_flush_or_checkpoint = FALSE; } else { advance = 0; - log->check_flush_or_checkpoint = FALSE; } checkpoint_age = log->lsn - log->last_checkpoint_lsn; @@ -2201,9 +2179,9 @@ log_checkpoint_margin(void) do_checkpoint = TRUE; - //log->check_flush_or_checkpoint = FALSE; + log->check_flush_or_checkpoint = FALSE; } else { - //log->check_flush_or_checkpoint = FALSE; + log->check_flush_or_checkpoint = FALSE; } mutex_exit(&(log->mutex)); @@ -2211,7 +2189,6 @@ log_checkpoint_margin(void) if (advance) { ib_uint64_t new_oldest = oldest_lsn + advance; -retry: success = log_preflush_pool_modified_pages(new_oldest, sync); /* If the flush succeeded, this thread has done its part @@ -2226,7 +2203,7 @@ retry: log->check_flush_or_checkpoint = TRUE; mutex_exit(&(log->mutex)); - goto retry; + goto loop; } } @@ -3104,11 +3081,7 @@ void log_check_margins(void) /*===================*/ { -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug == 1) - return; -#endif - loop: +loop: log_flush_margin(); log_checkpoint_margin(); @@ -3140,9 +3113,12 @@ void logs_empty_and_mark_files_at_shutdown(void) /*=======================================*/ { - ib_uint64_t lsn; - ulint arch_log_no; - ibool server_busy; + ib_uint64_t lsn; + ulint arch_log_no; + ibool server_busy; + ulint count = 0; + ulint pending_io; + ulint active_thd; if (srv_print_verbose_log) { ut_print_timestamp(stderr); @@ -3155,6 +3131,8 @@ logs_empty_and_mark_files_at_shutdown(void) loop: os_thread_sleep(100000); + count++; + mutex_enter(&kernel_mutex); /* We need the monitor threads to stop before we proceed with @@ -3163,6 +3141,21 @@ loop: if (srv_error_monitor_active || srv_lock_timeout_active || srv_monitor_active) { + const char* thread_active = NULL; + + /* Print a message every 60 seconds if we are waiting + for the monitor thread to exit. Master and worker threads + check will be done later. */ + if (srv_print_verbose_log && count > 600) { + + if (srv_error_monitor_active) { + thread_active = "srv_error_monitor_thread"; + } else if (srv_lock_timeout_active) { + thread_active = "srv_lock_timeout thread"; + } else if (srv_monitor_active) { + thread_active = "srv_monitor_thread"; + } + } mutex_exit(&kernel_mutex); @@ -3170,6 +3163,13 @@ loop: os_event_set(srv_monitor_event); os_event_set(srv_timeout_event); + if (thread_active) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for %s to exit\n", + thread_active); + count = 0; + } + goto loop; } @@ -3180,9 +3180,54 @@ loop: server_busy = trx_n_mysql_transactions > 0 || UT_LIST_GET_LEN(trx_sys->trx_list) > trx_n_prepared; + + if (server_busy) { + ulint total_trx = UT_LIST_GET_LEN(trx_sys->trx_list) + + trx_n_mysql_transactions; + + mutex_exit(&kernel_mutex); + + if (srv_print_verbose_log && count > 600) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for %lu " + "active transactions to finish\n", + (ulong) total_trx); + count = 0; + } + + goto loop; + } + mutex_exit(&kernel_mutex); - if (server_busy || srv_is_any_background_thread_active()) { + /* Check that the background threads are suspended */ + active_thd = srv_get_active_thread_type(); + + if (active_thd != ULINT_UNDEFINED) { + + /* The srv_lock_timeout_thread, srv_error_monitor_thread + and srv_monitor_thread should already exit by now. The + only threads to be suspended are the master threads + and worker threads (purge threads). Print the thread + type if any of such threads not in suspended mode */ + if (srv_print_verbose_log && count > 600) { + const char* thread_type = ""; + + switch (active_thd) { + case SRV_WORKER: + thread_type = "worker threads"; + break; + case SRV_MASTER: + thread_type = "master thread"; + break; + } + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for %s " + "to be suspended\n", thread_type); + count = 0; + } + goto loop; } @@ -3194,10 +3239,35 @@ loop: || log_sys->n_pending_writes; mutex_exit(&log_sys->mutex); - if (server_busy || !buf_pool_check_no_pending_io()) { + if (server_busy) { + if (srv_print_verbose_log && count > 600) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Pending checkpoint_writes: %lu\n" + " InnoDB: Pending log flush writes: %lu\n", + (ulong) log_sys->n_pending_checkpoint_writes, + (ulong) log_sys->n_pending_writes); + count = 0; + } + goto loop; } + pending_io = buf_pool_check_num_pending_io(); + + if (pending_io) { + if (srv_print_verbose_log && count > 600) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for %lu buffer page " + "I/Os to complete\n", + (ulong) pending_io); + count = 0; + } + + goto loop; + } + + #ifdef UNIV_LOG_ARCHIVE log_archive_all(); #endif /* UNIV_LOG_ARCHIVE */ @@ -3221,7 +3291,7 @@ loop: log_buffer_flush_to_disk(); /* Check that the background threads stay suspended */ - if (srv_is_any_background_thread_active()) { + if (srv_get_active_thread_type() != ULINT_UNDEFINED) { fprintf(stderr, "InnoDB: Warning: some background thread" " woke up during shutdown\n"); @@ -3230,7 +3300,7 @@ loop: srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; fil_close_all_files(); - ut_a(!srv_is_any_background_thread_active()); + ut_a(srv_get_active_thread_type() == ULINT_UNDEFINED); return; } @@ -3268,7 +3338,7 @@ loop: mutex_exit(&log_sys->mutex); /* Check that the background threads stay suspended */ - if (srv_is_any_background_thread_active()) { + if (srv_get_active_thread_type() != ULINT_UNDEFINED) { fprintf(stderr, "InnoDB: Warning: some background thread woke up" " during shutdown\n"); @@ -3286,13 +3356,20 @@ loop: if (!buf_all_freed()) { + if (srv_print_verbose_log && count > 600) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for dirty buffer " + "pages to be flushed\n"); + count = 0; + } + goto loop; } srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Make some checks that the server really is quiet */ - ut_a(!srv_is_any_background_thread_active()); + ut_a(srv_get_active_thread_type() == ULINT_UNDEFINED); ut_a(buf_all_freed()); ut_a(lsn == log_sys->lsn); @@ -3314,7 +3391,7 @@ loop: fil_close_all_files(); /* Make some checks that the server really is quiet */ - ut_a(!srv_is_any_background_thread_active()); + ut_a(srv_get_active_thread_type() == ULINT_UNDEFINED); ut_a(buf_all_freed()); ut_a(lsn == log_sys->lsn); diff --git a/mem/mem0pool.c b/mem/mem0pool.c index dc68cf8eb24..50dbe526d64 100644 --- a/mem/mem0pool.c +++ b/mem/mem0pool.c @@ -228,11 +228,7 @@ mem_pool_create( pool = ut_malloc(sizeof(mem_pool_t)); - /* We do not set the memory to zero (FALSE) in the pool, - but only when allocated at a higher level in mem0mem.c. - This is to avoid masking useful Purify warnings. */ - - pool->buf = ut_malloc_low(size, FALSE, TRUE); + pool->buf = ut_malloc_low(size, TRUE); pool->size = size; mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL); diff --git a/mtr/mtr0mtr.c b/mtr/mtr0mtr.c index 08234609ff0..d852ed6f496 100644 --- a/mtr/mtr0mtr.c +++ b/mtr/mtr0mtr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/os/os0file.c b/os/os0file.c index 0b1294a169e..26636c8a6cb 100644 --- a/os/os0file.c +++ b/os/os0file.c @@ -3308,7 +3308,91 @@ retry: fprintf(stderr, "InnoDB: You can disable Linux Native AIO by" - " setting innodb_native_aio = off in my.cnf\n"); + " setting innodb_use_native_aio = 0 in my.cnf\n"); + return(FALSE); +} + +/******************************************************************//** +Checks if the system supports native linux aio. On some kernel +versions where native aio is supported it won't work on tmpfs. In such +cases we can't use native aio as it is not possible to mix simulated +and native aio. +@return: TRUE if supported, FALSE otherwise. */ +static +ibool +os_aio_native_aio_supported(void) +/*=============================*/ +{ + int fd; + byte* buf; + byte* ptr; + struct io_event io_event; + io_context_t io_ctx; + struct iocb iocb; + struct iocb* p_iocb; + int err; + + if (!os_aio_linux_create_io_ctx(1, &io_ctx)) { + /* The platform does not support native aio. */ + return(FALSE); + } + + /* Now check if tmpdir supports native aio ops. */ + fd = innobase_mysql_tmpfile(); + + if (fd < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: unable to create " + "temp file to check native AIO support.\n"); + + return(FALSE); + } + + memset(&io_event, 0x0, sizeof(io_event)); + + buf = (byte*) ut_malloc(UNIV_PAGE_SIZE * 2); + ptr = (byte*) ut_align(buf, UNIV_PAGE_SIZE); + + /* Suppress valgrind warning. */ + memset(buf, 0x00, UNIV_PAGE_SIZE * 2); + + memset(&iocb, 0x0, sizeof(iocb)); + p_iocb = &iocb; + io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0); + + err = io_submit(io_ctx, 1, &p_iocb); + if (err >= 1) { + /* Now collect the submitted IO request. */ + err = io_getevents(io_ctx, 1, 1, &io_event, NULL); + } + + ut_free(buf); + close(fd); + + switch (err) { + case 1: + return(TRUE); + + case -EINVAL: + case -ENOSYS: + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Linux Native AIO is not" + " supported on tmpdir.\n" + "InnoDB: You can either move tmpdir to a" + " file system that supports native AIO\n" + "InnoDB: or you can set" + " innodb_use_native_aio to FALSE to avoid" + " this message.\n"); + + /* fall through. */ + default: + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Linux Native AIO check" + " on tmpdir returned error[%d]\n", -err); + } + return(FALSE); } #endif /* LINUX_NATIVE_AIO */ @@ -3475,6 +3559,19 @@ os_aio_init( os_io_init_simple(); +#if defined(LINUX_NATIVE_AIO) + /* Check if native aio is supported on this system and tmpfs */ + if (srv_use_native_aio + && !os_aio_native_aio_supported()) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: Linux Native AIO" + " disabled.\n"); + srv_use_native_aio = FALSE; + } +#endif /* LINUX_NATIVE_AIO */ + for (i = 0; i < n_segments; i++) { srv_set_io_thread_op_info(i, "not started yet"); } diff --git a/os/os0proc.c b/os/os0proc.c index 0f56a608f38..68321e1aaf9 100644 --- a/os/os0proc.c +++ b/os/os0proc.c @@ -111,9 +111,6 @@ os_mem_alloc_large( os_fast_mutex_lock(&ut_list_mutex); ut_total_allocated_memory += size; os_fast_mutex_unlock(&ut_list_mutex); -# ifdef UNIV_SET_MEM_TO_ZERO - memset(ptr, '\0', size); -# endif UNIV_MEM_ALLOC(ptr, size); return(ptr); } diff --git a/page/page0cur.c b/page/page0cur.c index 936762b986a..d49b121afab 100644 --- a/page/page0cur.c +++ b/page/page0cur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -900,7 +900,7 @@ page_cur_parse_insert_rec( ut_print_buf(stderr, ptr2, 300); putc('\n', stderr); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); ut_error; } @@ -1180,14 +1180,15 @@ page_cur_insert_rec_zip_reorg( /* Before trying to reorganize the page, store the number of preceding records on the page. */ pos = page_rec_get_n_recs_before(rec); + ut_ad(pos > 0); if (page_zip_reorganize(block, index, mtr)) { /* The page was reorganized: Find rec by seeking to pos, and update *current_rec. */ - rec = page + PAGE_NEW_INFIMUM; - - while (--pos) { - rec = page + rec_get_next_offs(rec, TRUE); + if (pos > 1) { + rec = page_rec_get_nth(page, pos - 1); + } else { + rec = page + PAGE_NEW_INFIMUM; } *current_rec = rec; @@ -1283,6 +1284,12 @@ page_cur_insert_rec_zip( insert_rec = page_cur_insert_rec_zip_reorg( current_rec, block, index, insert_rec, page, page_zip, mtr); +#ifdef UNIV_DEBUG + if (insert_rec) { + rec_offs_make_valid( + insert_rec, index, offsets); + } +#endif /* UNIV_DEBUG */ } return(insert_rec); diff --git a/page/page0page.c b/page/page0page.c index 4858929082a..4a389bbe5b8 100644 --- a/page/page0page.c +++ b/page/page0page.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -148,7 +148,7 @@ page_dir_find_owner_slot( fputs("\n" "InnoDB: on that page!\n", stderr); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); ut_error; } @@ -549,8 +549,7 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - ut_a((ibool)!!page_is_comp(new_page) - == dict_table_is_comp(index->table)); + btr_assert_not_corrupted(new_block, index); ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); @@ -570,8 +569,10 @@ page_copy_rec_list_end_no_locks( /* Track an assertion failure reported on the mailing list on June 18th, 2003 */ - buf_page_print(new_page, 0); - buf_page_print(page_align(rec), 0); + buf_page_print(new_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + buf_page_print(page_align(rec), 0, + BUF_PAGE_PRINT_NO_CRASH); ut_print_timestamp(stderr); fprintf(stderr, @@ -1453,55 +1454,54 @@ page_dir_balance_slot( } } -#ifndef UNIV_HOTBACKUP /************************************************************//** -Returns the middle record of the record list. If there are an even number -of records in the list, returns the first record of the upper half-list. -@return middle record */ +Returns the nth record of the record list. +This is the inverse function of page_rec_get_n_recs_before(). +@return nth record */ UNIV_INTERN -rec_t* -page_get_middle_rec( -/*================*/ - page_t* page) /*!< in: page */ +const rec_t* +page_rec_get_nth_const( +/*===================*/ + const page_t* page, /*!< in: page */ + ulint nth) /*!< in: nth record */ { - page_dir_slot_t* slot; - ulint middle; + const page_dir_slot_t* slot; ulint i; ulint n_owned; - ulint count; - rec_t* rec; + const rec_t* rec; - /* This many records we must leave behind */ - middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; - - count = 0; + ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); for (i = 0;; i++) { slot = page_dir_get_nth_slot(page, i); n_owned = page_dir_slot_get_n_owned(slot); - if (count + n_owned > middle) { + if (n_owned > nth) { break; } else { - count += n_owned; + nth -= n_owned; } } ut_ad(i > 0); slot = page_dir_get_nth_slot(page, i - 1); - rec = (rec_t*) page_dir_slot_get_rec(slot); - rec = page_rec_get_next(rec); + rec = page_dir_slot_get_rec(slot); - /* There are now count records behind rec */ - - for (i = 0; i < middle - count; i++) { - rec = page_rec_get_next(rec); + if (page_is_comp(page)) { + do { + rec = page_rec_get_next_low(rec, TRUE); + ut_ad(rec); + } while (nth--); + } else { + do { + rec = page_rec_get_next_low(rec, FALSE); + ut_ad(rec); + } while (nth--); } return(rec); } -#endif /* !UNIV_HOTBACKUP */ /***************************************************************//** Returns the number of records before the given record in chain. @@ -1563,6 +1563,7 @@ page_rec_get_n_recs_before( n--; ut_ad(n >= 0); + ut_ad((ulint)n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); return((ulint) n); } @@ -1835,7 +1836,7 @@ page_check_dir( fprintf(stderr, "InnoDB: Page directory corruption:" " infimum not pointed to\n"); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); } if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) { @@ -1843,7 +1844,7 @@ page_check_dir( fprintf(stderr, "InnoDB: Page directory corruption:" " supremum not pointed to\n"); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); } } #endif /* !UNIV_HOTBACKUP */ @@ -2547,7 +2548,7 @@ func_exit2: (ulong) page_get_space_id(page), (ulong) page_get_page_no(page), index->name); - buf_page_print(page, 0); + buf_page_print(page, 0, 0); } return(ret); diff --git a/pars/pars0pars.c b/pars/pars0pars.c index ef107f2896f..86f54195682 100644 --- a/pars/pars0pars.c +++ b/pars/pars0pars.c @@ -1857,7 +1857,7 @@ pars_sql( ut_ad(str); - heap = mem_heap_create(256); + heap = mem_heap_create(16000); /* Currently, the parser is not reentrant: */ ut_ad(mutex_own(&(dict_sys->mutex))); diff --git a/row/row0ins.c b/row/row0ins.c index adc75bd5760..a629a4fb195 100644 --- a/row/row0ins.c +++ b/row/row0ins.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -23,6 +23,8 @@ Insert into a table Created 4/20/1996 Heikki Tuuri *******************************************************/ +#include "m_string.h" /* for my_sys.h */ +#include "my_sys.h" /* DEBUG_SYNC_C */ #include "row0ins.h" #ifdef UNIV_NONINL @@ -47,6 +49,8 @@ Created 4/20/1996 Heikki Tuuri #include "data0data.h" #include "usr0sess.h" #include "buf0lru.h" +#include "m_string.h" +#include "my_sys.h" #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 @@ -348,9 +352,9 @@ row_ins_clust_index_entry_by_modify( return(DB_LOCK_TABLE_FULL); } - err = btr_cur_pessimistic_update(0, cursor, - heap, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update( + BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update, + 0, thr, mtr); } return(err); @@ -950,7 +954,7 @@ row_ins_foreign_check_on_constraint( fputs("\n" "InnoDB: Submit a detailed bug report to" " http://bugs.mysql.com\n", stderr); - + ut_ad(0); err = DB_SUCCESS; goto nonstandard_exit_func; @@ -1083,6 +1087,9 @@ row_ins_foreign_check_on_constraint( release the latch. */ row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); + + DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze"); + row_mysql_freeze_data_dictionary(thr_get_trx(thr)); mtr_start(mtr); @@ -1990,6 +1997,7 @@ row_ins_index_entry_low( ulint modify = 0; /* remove warning */ rec_t* insert_rec; rec_t* rec; + ulint* offsets; ulint err; ulint n_unique; big_rec_t* big_rec = NULL; @@ -2099,6 +2107,64 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &heap, &big_rec, entry, thr, &mtr); + + if (big_rec) { + ut_a(err == DB_SUCCESS); + /* Write out the externally stored + columns while still x-latching + index->lock and block->lock. Allocate + pages for big_rec in the mtr that + modified the B-tree, but be sure to skip + any pages that were freed in mtr. We will + write out the big_rec pages before + committing the B-tree mini-transaction. If + the system crashes so that crash recovery + will not replay the mtr_commit(&mtr), the + big_rec pages will be left orphaned until + the pages are allocated for something else. + + TODO: If the allocation extends the + tablespace, it will not be redo + logged, in either mini-transaction. + Tablespace extension should be + redo-logged in the big_rec + mini-transaction, so that recovery + will not fail when the big_rec was + written to the extended portion of the + file, in case the file was somehow + truncated in the crash. */ + + rec = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets( + rec, index, NULL, + ULINT_UNDEFINED, &heap); + + DEBUG_SYNC_C("before_row_ins_upd_extern"); + err = btr_store_big_rec_extern_fields( + index, btr_cur_get_block(&cursor), + rec, offsets, big_rec, &mtr, + BTR_STORE_INSERT_UPDATE); + DEBUG_SYNC_C("after_row_ins_upd_extern"); + /* If writing big_rec fails (for + example, because of DB_OUT_OF_FILE_SPACE), + the record will be corrupted. Even if + we did not update any externally + stored columns, our update could cause + the record to grow so that a + non-updated column was selected for + external storage. This non-update + would not have been written to the + undo log, and thus the record cannot + be rolled back. + + However, because we have not executed + mtr_commit(mtr) yet, the update will + not be replayed in crash recovery, and + the following assertion failure will + effectively "roll back" the operation. */ + ut_a(err == DB_SUCCESS); + goto stored_big_rec; + } } else { ut_ad(!n_ext); err = row_ins_sec_index_entry_by_modify( @@ -2145,8 +2211,13 @@ function_exit: return(err); } + DBUG_EXECUTE_IF( + "row_ins_extern_checkpoint", + log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);); + mtr_start(&mtr); + DEBUG_SYNC_C("before_row_ins_extern_latch"); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, &cursor, 0, __FILE__, __LINE__, &mtr); @@ -2154,10 +2225,13 @@ function_exit: offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); + DEBUG_SYNC_C("before_row_ins_extern"); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, &mtr, FALSE, big_rec); + rec, offsets, big_rec, &mtr, BTR_STORE_INSERT); + DEBUG_SYNC_C("after_row_ins_extern"); +stored_big_rec: if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/row/row0merge.c b/row/row0merge.c index ed5c0abc884..c303a372ed0 100644 --- a/row/row0merge.c +++ b/row/row0merge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -583,7 +583,7 @@ row_merge_buf_write( REC_STATUS_ORDINARY, entry, n_fields, &extra_size); - ut_ad(size > extra_size); + ut_ad(size >= extra_size); ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES); extra_size -= REC_N_NEW_EXTRA_BYTES; size -= REC_N_NEW_EXTRA_BYTES; @@ -1607,22 +1607,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - struct TABLE* table) /*!< in/out: MySQL table, for + struct TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1638,17 +1644,20 @@ row_merge( #endif /* POSIX_FADV_SEQUENTIAL */ /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1656,21 +1665,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1680,6 +1674,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1692,6 +1689,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1703,10 +1703,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1731,27 +1744,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -2018,7 +2048,7 @@ row_merge_drop_index( tables in Innobase. Deleting a row from SYS_INDEXES table also frees the file segments of the B-tree associated with the index. */ - static const char str1[] = + static const char sql[] = "PROCEDURE DROP_INDEX_PROC () IS\n" "BEGIN\n" /* Rename the index, so that it will be dropped by @@ -2044,9 +2074,19 @@ row_merge_drop_index( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - err = que_eval_sql(info, str1, FALSE, trx); + err = que_eval_sql(info, sql, FALSE, trx); - ut_a(err == DB_SUCCESS); + + if (err != DB_SUCCESS) { + /* Even though we ensure that DDL transactions are WAIT + and DEADLOCK free, we could encounter other errors e.g., + DB_TOO_MANY_TRANSACTIONS. */ + trx->error_state = DB_SUCCESS; + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: row_merge_drop_index failed " + "with error code: %lu.\n", (ulint) err); + } /* Replace this index with another equivalent index for all foreign key constraints on this table where this index is used */ @@ -2346,7 +2386,7 @@ row_merge_rename_indexes( /* We use the private SQL parser of Innobase to generate the query graphs needed in renaming indexes. */ - static const char rename_indexes[] = + static const char sql[] = "PROCEDURE RENAME_INDEXES_PROC () IS\n" "BEGIN\n" "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n" @@ -2362,7 +2402,7 @@ row_merge_rename_indexes( pars_info_add_ull_literal(info, "tableid", table->id); - err = que_eval_sql(info, rename_indexes, FALSE, trx); + err = que_eval_sql(info, sql, FALSE, trx); if (err == DB_SUCCESS) { dict_index_t* index = dict_table_get_first_index(table); @@ -2372,6 +2412,15 @@ row_merge_rename_indexes( } index = dict_table_get_next_index(index); } while (index); + } else { + /* Even though we ensure that DDL transactions are WAIT + and DEADLOCK free, we could encounter other errors e.g., + DB_TOO_MANY_TRANSACTIONS. */ + trx->error_state = DB_SUCCESS; + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: row_merge_rename_indexes " + "failed with error code: %lu.\n", (ulint) err); } trx->op_info = ""; @@ -2409,7 +2458,7 @@ row_merge_rename_tables( memcpy(old_name, old_table->name, strlen(old_table->name) + 1); } else { ut_print_timestamp(stderr); - fprintf(stderr, "InnoDB: too long table name: '%s', " + fprintf(stderr, " InnoDB: too long table name: '%s', " "max length is %d\n", old_table->name, MAX_FULL_NAME_LEN); ut_error; diff --git a/row/row0mysql.c b/row/row0mysql.c index 5e75e487f6b..eb1eb095fd1 100644 --- a/row/row0mysql.c +++ b/row/row0mysql.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -52,6 +52,8 @@ Created 9/17/2000 Heikki Tuuri #include "fil0fil.h" #include "ibuf0ibuf.h" #include "ha_prototypes.h" +#include "m_string.h" +#include "my_sys.h" /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; @@ -668,17 +670,60 @@ UNIV_INTERN row_prebuilt_t* row_create_prebuilt( /*================*/ - dict_table_t* table) /*!< in: Innobase table handle */ + dict_table_t* table, /*!< in: Innobase table handle */ + ulint mysql_row_len) /*!< in: length in bytes of a row in + the MySQL format */ { row_prebuilt_t* prebuilt; mem_heap_t* heap; dict_index_t* clust_index; dtuple_t* ref; ulint ref_len; + ulint search_tuple_n_fields; - heap = mem_heap_create(sizeof *prebuilt + 128); + search_tuple_n_fields = 2 * dict_table_get_n_cols(table); - prebuilt = mem_heap_zalloc(heap, sizeof *prebuilt); + clust_index = dict_table_get_first_index(table); + + /* Make sure that search_tuple is long enough for clustered index */ + ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); + + ref_len = dict_index_get_n_unique(clust_index); + +#define PREBUILT_HEAP_INITIAL_SIZE \ + ( \ + sizeof(*prebuilt) \ + /* allocd in this function */ \ + + DTUPLE_EST_ALLOC(search_tuple_n_fields) \ + + DTUPLE_EST_ALLOC(ref_len) \ + /* allocd in row_prebuild_sel_graph() */ \ + + sizeof(sel_node_t) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + /* allocd in row_get_prebuilt_update_vector() */ \ + + sizeof(upd_node_t) \ + + sizeof(upd_t) \ + + sizeof(upd_field_t) \ + * dict_table_get_n_cols(table) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + /* allocd in row_get_prebuilt_insert_row() */ \ + + sizeof(ins_node_t) \ + /* mysql_row_len could be huge and we are not \ + sure if this prebuilt instance is going to be \ + used in inserts */ \ + + (mysql_row_len < 256 ? mysql_row_len : 0) \ + + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \ + + sizeof(que_fork_t) \ + + sizeof(que_thr_t) \ + ) + + /* We allocate enough space for the objects that are likely to + be created later in order to minimize the number of malloc() + calls */ + heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE); + + prebuilt = mem_heap_zalloc(heap, sizeof(*prebuilt)); prebuilt->magic_n = ROW_PREBUILT_ALLOCATED; prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED; @@ -688,23 +733,15 @@ row_create_prebuilt( prebuilt->sql_stat_start = TRUE; prebuilt->heap = heap; - prebuilt->pcur = btr_pcur_create_for_mysql(); - prebuilt->clust_pcur = btr_pcur_create_for_mysql(); + btr_pcur_reset(&prebuilt->pcur); + btr_pcur_reset(&prebuilt->clust_pcur); prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, sizeof prebuilt->stored_select_lock_type); - prebuilt->search_tuple = dtuple_create( - heap, 2 * dict_table_get_n_cols(table)); - - clust_index = dict_table_get_first_index(table); - - /* Make sure that search_tuple is long enough for clustered index */ - ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); - - ref_len = dict_index_get_n_unique(clust_index); + prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields); ref = dtuple_create(heap, ref_len); @@ -721,6 +758,8 @@ row_create_prebuilt( prebuilt->autoinc_last_value = 0; + prebuilt->mysql_row_len = mysql_row_len; + return(prebuilt); } @@ -756,8 +795,8 @@ row_prebuilt_free( prebuilt->magic_n = ROW_PREBUILT_FREED; prebuilt->magic_n2 = ROW_PREBUILT_FREED; - btr_pcur_free_for_mysql(prebuilt->pcur); - btr_pcur_free_for_mysql(prebuilt->clust_pcur); + btr_pcur_reset(&prebuilt->pcur); + btr_pcur_reset(&prebuilt->clust_pcur); if (prebuilt->mysql_template) { mem_free(prebuilt->mysql_template); @@ -1418,6 +1457,8 @@ row_update_for_mysql( return(DB_ERROR); } + DEBUG_SYNC_C("innodb_row_update_for_mysql_begin"); + trx->op_info = "updating or deleting"; row_mysql_delay_if_needed(); @@ -1428,11 +1469,11 @@ row_update_for_mysql( clust_index = dict_table_get_first_index(table); - if (prebuilt->pcur->btr_cur.index == clust_index) { - btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur); + if (prebuilt->pcur.btr_cur.index == clust_index) { + btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur); } else { btr_pcur_copy_stored_position(node->pcur, - prebuilt->clust_pcur); + &prebuilt->clust_pcur); } ut_a(node->pcur->rel_pos == BTR_PCUR_ON); @@ -1537,8 +1578,8 @@ row_unlock_for_mysql( clust_pcur, and we do not need to reposition the cursors. */ { - btr_pcur_t* pcur = prebuilt->pcur; - btr_pcur_t* clust_pcur = prebuilt->clust_pcur; + btr_pcur_t* pcur = &prebuilt->pcur; + btr_pcur_t* clust_pcur = &prebuilt->clust_pcur; trx_t* trx = prebuilt->trx; ut_ad(prebuilt && trx); @@ -1760,7 +1801,7 @@ row_mysql_freeze_data_dictionary_func( { ut_a(trx->dict_operation_lock_mode == 0); - rw_lock_s_lock_func(&dict_operation_lock, 0, file, line); + rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line); trx->dict_operation_lock_mode = RW_S_LATCH; } @@ -1797,7 +1838,7 @@ row_mysql_lock_data_dictionary_func( /* Serialize data dictionary operations with dictionary mutex: no deadlocks or lock waits can occur then in these operations */ - rw_lock_x_lock_func(&dict_operation_lock, 0, file, line); + rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line); trx->dict_operation_lock_mode = RW_X_LATCH; mutex_enter(&(dict_sys->mutex)); @@ -1968,6 +2009,21 @@ err_exit: } break; + case DB_TOO_MANY_CONCURRENT_TRXS: + /* We already have .ibd file here. it should be deleted. */ + + if (table->space && !fil_delete_tablespace(table->space, + FALSE)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: not able to" + " delete tablespace %lu of table ", + (ulong) table->space); + ut_print_name(stderr, trx, TRUE, table->name); + fputs("!\n", stderr); + } + /* fall through */ + case DB_DUPLICATE_KEY: default: /* We may also get err == DB_ERROR if the .ibd file for the @@ -3145,6 +3201,7 @@ row_drop_table_for_mysql( { dict_foreign_t* foreign; dict_table_t* table; + dict_index_t* index; ulint space_id; ulint err; const char* table_name; @@ -3352,6 +3409,18 @@ check_next_foreign: trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = table->id; + /* Mark all indexes unavailable in the data dictionary cache + before starting to drop the table. */ + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + rw_lock_x_lock(dict_index_get_lock(index)); + ut_ad(!index->to_be_dropped); + index->to_be_dropped = TRUE; + rw_lock_x_unlock(dict_index_get_lock(index)); + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also @@ -3492,7 +3561,7 @@ check_next_foreign: "InnoDB: of table "); ut_print_name(stderr, trx, TRUE, name); fprintf(stderr, ".\n"); - } else if (!fil_delete_tablespace(space_id)) { + } else if (!fil_delete_tablespace(space_id, FALSE)) { fprintf(stderr, "InnoDB: We removed now the InnoDB" " internal data dictionary entry\n" @@ -3519,6 +3588,17 @@ check_next_foreign: the undo log. We can directly exit here and return the DB_TOO_MANY_CONCURRENT_TRXS error. */ + + /* Mark all indexes available in the data dictionary + cache again. */ + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + rw_lock_x_lock(dict_index_get_lock(index)); + index->to_be_dropped = FALSE; + rw_lock_x_unlock(dict_index_get_lock(index)); + } break; case DB_OUT_OF_FILE_SPACE: @@ -3613,7 +3693,7 @@ row_mysql_drop_temp_tables(void) btr_pcur_store_position(&pcur, &mtr); btr_pcur_commit_specify_mtr(&pcur, &mtr); - table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); + table = dict_table_get_low(table_name); if (table) { row_drop_table_for_mysql(table_name, trx, FALSE); @@ -3875,6 +3955,7 @@ row_rename_table_for_mysql( ulint n_constraints_to_drop = 0; ibool old_is_tmp, new_is_tmp; pars_info_t* info = NULL; + int retry; ut_a(old_name != NULL); ut_a(new_name != NULL); @@ -3957,6 +4038,25 @@ row_rename_table_for_mysql( } } + /* Is a foreign key check running on this table? */ + for (retry = 0; retry < 100 + && table->n_foreign_key_checks_running > 0; ++retry) { + row_mysql_unlock_data_dictionary(trx); + os_thread_yield(); + row_mysql_lock_data_dictionary(trx); + } + + if (table->n_foreign_key_checks_running > 0) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: in ALTER TABLE ", stderr); + ut_print_name(stderr, trx, TRUE, old_name); + fprintf(stderr, "\n" + "InnoDB: a FOREIGN KEY check is running.\n" + "InnoDB: Cannot rename table.\n"); + err = DB_TABLE_IN_FK_CHECK; + goto funct_exit; + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in updating the dictionary data from system tables. */ diff --git a/row/row0row.c b/row/row0row.c index 0462f280858..2c33bdc4b15 100644 --- a/row/row0row.c +++ b/row/row0row.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -241,13 +241,18 @@ row_build( ut_ad(rec_offs_validate(rec, index, offsets)); } -#if 0 && defined UNIV_BLOB_NULL_DEBUG - /* This one can fail in trx_rollback_active() if - the server crashed during an insert before the - btr_store_big_rec_extern_fields() did mtr_commit() - all BLOB pointers to the clustered index record. */ - ut_a(!rec_offs_any_null_extern(rec, offsets)); -#endif /* 0 && UNIV_BLOB_NULL_DEBUG */ +#ifdef UNIV_BLOB_NULL_DEBUG + if (rec_offs_any_null_extern(rec, offsets)) { + /* This condition can occur during crash recovery + before trx_rollback_active() has completed execution, + or when a concurrently executing + row_ins_index_entry_low() has committed the B-tree + mini-transaction but has not yet managed to restore + the cursor position for writing the big_rec. */ + ut_a(trx_undo_roll_ptr_is_insert( + row_get_rec_roll_ptr(rec, index, offsets))); + } +#endif /* UNIV_BLOB_NULL_DEBUG */ if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ diff --git a/row/row0sel.c b/row/row0sel.c index 7079e96bb12..a3ea482ad4a 100644 --- a/row/row0sel.c +++ b/row/row0sel.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -2301,7 +2301,12 @@ row_sel_convert_mysql_key_to_innobase( in the tuple is already according to index! */ byte* buf, /*!< in: buffer to use in field - conversions */ + conversions; NOTE that dtuple->data + may end up pointing inside buf so + do not discard that buffer while + the tuple is being used. See + row_mysql_store_col_in_innobase_format() + in the case of DATA_INT */ ulint buf_len, /*!< in: buffer length */ dict_index_t* index, /*!< in: index of the key value */ const byte* key_ptr, /*!< in: MySQL key value */ @@ -2433,6 +2438,7 @@ row_sel_convert_mysql_key_to_innobase( /* Storing may use at most data_len bytes of buf */ if (UNIV_LIKELY(!is_null)) { + ut_a(buf + data_len <= original_buf + buf_len); row_mysql_store_col_in_innobase_format( dfield, buf, FALSE, /* MySQL key value format col */ @@ -2915,17 +2921,17 @@ row_sel_get_clust_rec_for_mysql( btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref, PAGE_CUR_LE, BTR_SEARCH_LEAF, - prebuilt->clust_pcur, 0, mtr); + &prebuilt->clust_pcur, 0, mtr); - clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); + clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur); - prebuilt->clust_pcur->trx_if_known = trx; + prebuilt->clust_pcur.trx_if_known = trx; /* Note: only if the search ends up on a non-infimum record is the low_match value the real match to the search tuple */ if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(prebuilt->clust_pcur) + || btr_pcur_get_low_match(&prebuilt->clust_pcur) < dict_index_get_n_unique(clust_index)) { /* In a rare case it is possible that no clust rec is found @@ -2957,6 +2963,7 @@ row_sel_get_clust_rec_for_mysql( fputs("\n" "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); } clust_rec = NULL; @@ -2974,7 +2981,7 @@ row_sel_get_clust_rec_for_mysql( we set a LOCK_REC_NOT_GAP type lock */ err = lock_clust_rec_read_check_and_lock( - 0, btr_pcur_get_block(prebuilt->clust_pcur), + 0, btr_pcur_get_block(&prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); switch (err) { @@ -3052,7 +3059,7 @@ func_exit: /* We may use the cursor in update or in unlock_row(): store its position */ - btr_pcur_store_position(prebuilt->clust_pcur, mtr); + btr_pcur_store_position(&prebuilt->clust_pcur, mtr); } err_exit: @@ -3300,7 +3307,7 @@ row_sel_try_search_shortcut_for_mysql( { dict_index_t* index = prebuilt->index; const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* pcur = &prebuilt->pcur; trx_t* trx = prebuilt->trx; const rec_t* rec; @@ -3389,7 +3396,7 @@ row_search_for_mysql( dict_index_t* index = prebuilt->index; ibool comp = dict_table_is_comp(index->table); const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* pcur = &prebuilt->pcur; trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; @@ -4073,7 +4080,8 @@ wrong_offs: if ((srv_force_recovery == 0 || moves_up == FALSE) && srv_pass_corrupt_table <= 1) { ut_print_timestamp(stderr); - buf_page_print(page_align(rec), 0); + buf_page_print(page_align(rec), 0, + BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "\nInnoDB: rec address %p," " buf block fix count %lu\n", @@ -4092,7 +4100,7 @@ wrong_offs: "InnoDB: restore from a backup, or" " dump + drop + reimport the table.\n", stderr); - + ut_ad(0); err = DB_CORRUPTION; goto lock_wait_or_error; @@ -4445,7 +4453,9 @@ no_gap_lock: applicable to unique secondary indexes. Current behaviour is to widen the scope of a lock on an already delete marked record if the same record is deleted twice by the same transaction */ - if (index == clust_index && unique_search) { + if (index == clust_index && unique_search + && !prebuilt->used_in_HANDLER) { + err = DB_RECORD_NOT_FOUND; goto normal_return; diff --git a/row/row0umod.c b/row/row0umod.c index b86ce9eeabd..9597c476125 100644 --- a/row/row0umod.c +++ b/row/row0umod.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -509,6 +509,7 @@ row_undo_mod_del_unmark_sec_and_undo_update( fputs("\n" "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); break; case ROW_FOUND: btr_cur = btr_pcur_get_btr_cur(&pcur); diff --git a/row/row0upd.c b/row/row0upd.c index 45c19b0ead0..28c770460ff 100644 --- a/row/row0upd.c +++ b/row/row0upd.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -23,6 +23,8 @@ Update of a row Created 12/27/1996 Heikki Tuuri *******************************************************/ +#include "m_string.h" /* for my_sys.h */ +#include "my_sys.h" /* DEBUG_SYNC_C */ #include "row0upd.h" #ifdef UNIV_NONINL @@ -1636,6 +1638,7 @@ row_upd_sec_index_entry( fputs("\n" "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); + ut_ad(0); break; case ROW_FOUND: /* Delete mark the old index record; it can already be @@ -2021,29 +2024,65 @@ row_upd_clust_rec( ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, - &heap, &big_rec, node->update, - node->cmpl_info, thr, mtr); - mtr_commit(mtr); + err = btr_cur_pessimistic_update( + BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, + &heap, &big_rec, node->update, + node->cmpl_info, thr, mtr); /* skip store extern for fake_changes */ if (err == DB_SUCCESS && big_rec && !(thr_get_trx(thr)->fake_changes)) { - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_t* rec; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + rec_t* rec; rec_offs_init(offsets_); - mtr_start(mtr); + ut_a(err == DB_SUCCESS); + /* Write out the externally stored + columns while still x-latching + index->lock and block->lock. Allocate + pages for big_rec in the mtr that + modified the B-tree, but be sure to skip + any pages that were freed in mtr. We will + write out the big_rec pages before + committing the B-tree mini-transaction. If + the system crashes so that crash recovery + will not replay the mtr_commit(&mtr), the + big_rec pages will be left orphaned until + the pages are allocated for something else. + + TODO: If the allocation extends the tablespace, it + will not be redo logged, in either mini-transaction. + Tablespace extension should be redo-logged in the + big_rec mini-transaction, so that recovery will not + fail when the big_rec was written to the extended + portion of the file, in case the file was somehow + truncated in the crash. */ - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); + DEBUG_SYNC_C("before_row_upd_extern"); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - mtr, TRUE, big_rec); - mtr_commit(mtr); + big_rec, mtr, BTR_STORE_UPDATE); + DEBUG_SYNC_C("after_row_upd_extern"); + /* If writing big_rec fails (for example, because of + DB_OUT_OF_FILE_SPACE), the record will be corrupted. + Even if we did not update any externally stored + columns, our update could cause the record to grow so + that a non-updated column was selected for external + storage. This non-update would not have been written + to the undo log, and thus the record cannot be rolled + back. + + However, because we have not executed mtr_commit(mtr) + yet, the update will not be replayed in crash + recovery, and the following assertion failure will + effectively "roll back" the operation. */ + ut_a(err == DB_SUCCESS); } + mtr_commit(mtr); + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } diff --git a/srv/srv0srv.c b/srv/srv0srv.c index d5f94768dba..0a72eb106a3 100644 --- a/srv/srv0srv.c +++ b/srv/srv0srv.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -433,6 +433,9 @@ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; UNIV_INTERN ulint srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; +#ifdef UNIV_DEBUG +UNIV_INTERN ulong srv_sys_stats_root_page = 0; +#endif UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; @@ -471,9 +474,6 @@ UNIV_INTERN ibool srv_print_lock_waits = FALSE; UNIV_INTERN ibool srv_print_buf_io = FALSE; UNIV_INTERN ibool srv_print_log_io = FALSE; UNIV_INTERN ibool srv_print_latch_waits = FALSE; - -UNIV_INTERN ulong srv_flush_checkpoint_debug = 0; - #endif /* UNIV_DEBUG */ UNIV_INTERN ulint srv_n_rows_inserted = 0; @@ -2802,6 +2802,7 @@ loop: "InnoDB: Please submit a bug report" " to http://bugs.mysql.com\n", old_lsn, new_lsn); + ut_ad(0); } old_lsn = new_lsn; @@ -2953,21 +2954,23 @@ exit_func: } /**********************************************************************//** -Check whether any background thread is active. -@return FALSE if all are are suspended or have exited. */ +Check whether any background thread is active. If so return the thread +type +@return ULINT_UNDEFINED if all are suspended or have exited, thread +type if any are still active. */ UNIV_INTERN -ibool -srv_is_any_background_thread_active(void) -/*=====================================*/ +ulint +srv_get_active_thread_type(void) +/*============================*/ { ulint i; - ibool ret = FALSE; + ibool ret = ULINT_UNDEFINED; mutex_enter(&kernel_mutex); for (i = 0; i <= SRV_MASTER; ++i) { if (srv_n_threads_active[i] != 0) { - ret = TRUE; + ret = i; break; } } @@ -2977,6 +2980,57 @@ srv_is_any_background_thread_active(void) return(ret); } +/*********************************************************************//** +This function prints progress message every 60 seconds during server +shutdown, for any activities that master thread is pending on. */ +static +void +srv_shutdown_print_master_pending( +/*==============================*/ + ib_time_t* last_print_time, /*!< last time the function + print the message */ + ulint n_tables_to_drop, /*!< number of tables to + be dropped */ + ulint n_bytes_merged, /*!< number of change buffer + just merged */ + ulint n_pages_flushed) /*!< number of pages flushed */ +{ + ib_time_t current_time; + double time_elapsed; + + current_time = ut_time(); + time_elapsed = ut_difftime(current_time, *last_print_time); + + if (time_elapsed > 60) { + *last_print_time = ut_time(); + + if (n_tables_to_drop) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for " + "%lu table(s) to be dropped\n", + (ulong) n_tables_to_drop); + } + + /* Check change buffer merge, we only wait for change buffer + merge if it is a slow shutdown */ + if (!srv_fast_shutdown && n_bytes_merged) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for change " + "buffer merge to complete\n" + " InnoDB: number of bytes of change buffer " + "just merged: %lu\n", + n_bytes_merged); + } + + if (n_pages_flushed) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for " + "%lu pages to be flushed\n", + (ulong) n_pages_flushed); + } + } +} + /*******************************************************************//** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used @@ -3146,6 +3200,7 @@ srv_master_thread( ib_uint64_t lsn_old; ib_uint64_t oldest_lsn; + ib_time_t last_print_time; #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Master thread starts, id %lu\n", @@ -3170,6 +3225,9 @@ srv_master_thread( mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + + last_print_time = ut_time(); + loop: /*****************************************************************/ /* ---- When there is database activity by users, we cycle in this @@ -3620,18 +3678,11 @@ retry_flush_batch: PCT_IO(10), IB_ULONGLONG_MAX); } -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug != 1) { -#endif + srv_main_thread_op_info = "making checkpoint"; - srv_main_thread_op_info = "making checkpoint"; + /* Make a new checkpoint about once in 10 seconds */ - /* Make a new checkpoint about once in 10 seconds */ - - log_checkpoint(TRUE, FALSE); -#ifdef UNIV_DEBUG - } -#endif + log_checkpoint(TRUE, FALSE); srv_main_thread_op_info = "reserving kernel mutex"; @@ -3710,10 +3761,6 @@ background_loop: } mutex_exit(&kernel_mutex); -#ifdef UNIV_DEBUG - if (srv_flush_checkpoint_debug == 1) - goto skip_flush; -#endif flush_loop: srv_main_thread_op_info = "flushing buffer pool pages"; srv_main_flush_loops++; @@ -3754,9 +3801,6 @@ flush_loop: goto flush_loop; } -#ifdef UNIV_DEBUG -skip_flush: -#endif srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); @@ -3772,6 +3816,14 @@ skip_flush: */ n_bytes_archived = 0; + /* Print progress message every 60 seconds during shutdown */ + if (srv_shutdown_state > 0 && srv_print_verbose_log) { + srv_shutdown_print_master_pending(&last_print_time, + n_tables_to_drop, + n_bytes_merged, + n_pages_flushed); + } + /* Keep looping in the background loop if still work to do */ if (srv_fast_shutdown && srv_shutdown_state > 0) { @@ -3790,6 +3842,7 @@ skip_flush: } else if (n_tables_to_drop + n_pages_purged + n_bytes_merged + n_pages_flushed + n_bytes_archived != 0) { + /* In a 'slow' shutdown we run purge and the insert buffer merge to completion */ diff --git a/srv/srv0start.c b/srv/srv0start.c index b11e63a8e56..34a2fdaa42a 100644 --- a/srv/srv0start.c +++ b/srv/srv0start.c @@ -925,8 +925,9 @@ skip_size_check: #endif /* UNIV_LOG_ARCHIVE */ min_flushed_lsn, max_flushed_lsn); - if (UNIV_PAGE_SIZE - != fsp_flags_get_page_size(flags)) { + if (!one_opened + && UNIV_PAGE_SIZE + != fsp_flags_get_page_size(flags)) { ut_print_timestamp(stderr); fprintf(stderr, @@ -1936,6 +1937,12 @@ innobase_start_or_create_for_mysql(void) trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); } + + if (UNIV_UNLIKELY(!dict_verify_xtradb_sys_stats())) { + fprintf(stderr, "InnoDB: Warning: " + "SYS_STATS table corrupted, recreating\n"); + dict_recreate_xtradb_sys_stats(); + } } if (!create_new_db && sum_of_new_sizes > 0) { diff --git a/sync/sync0rw.c b/sync/sync0rw.c index 8884812d84d..5068d1679c0 100644 --- a/sync/sync0rw.c +++ b/sync/sync0rw.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -780,7 +780,9 @@ rw_lock_add_debug_info( rw_lock_debug_mutex_exit(); if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_add_level(lock, lock->level); + sync_thread_add_level(lock, lock->level, + lock_type == RW_LOCK_EX + && lock->lock_word < 0); } } diff --git a/sync/sync0sync.c b/sync/sync0sync.c index d2c4617d65c..7654fade6e8 100644 --- a/sync/sync0sync.c +++ b/sync/sync0sync.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -700,7 +700,7 @@ mutex_set_debug_info( ut_ad(mutex); ut_ad(file_name); - sync_thread_add_level(mutex, mutex->level); + sync_thread_add_level(mutex, mutex->level, FALSE); mutex->file_name = file_name; mutex->line = line; @@ -1142,8 +1142,9 @@ void sync_thread_add_level( /*==================*/ void* latch, /*!< in: pointer to a mutex or an rw-lock */ - ulint level) /*!< in: level in the latching order; if + ulint level, /*!< in: level in the latching order; if SYNC_LEVEL_VARYING, nothing is done */ + ibool relock) /*!< in: TRUE if re-entering an x-lock */ { ulint i; sync_level_t* slot; @@ -1194,6 +1195,10 @@ sync_thread_add_level( array = thread_slot->levels; + if (relock) { + goto levels_ok; + } + /* NOTE that there is a problem with _NODE and _LEAF levels: if the B-tree height changes, then a leaf can change to an internal node or the other way around. We do not know at present if this can cause @@ -1376,6 +1381,7 @@ sync_thread_add_level( ut_error; } +levels_ok: if (array->next_free == ULINT_UNDEFINED) { ut_a(array->n_elems < array->max_elems); diff --git a/trx/trx0purge.c b/trx/trx0purge.c index eb4fa80fa40..122aab119ba 100644 --- a/trx/trx0purge.c +++ b/trx/trx0purge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -730,6 +730,7 @@ trx_purge_rseg_get_next_history_log( "InnoDB: report, and submit it" " to http://bugs.mysql.com\n", (ulong) trx_sys->rseg_history_len); + ut_ad(0); } mutex_exit(&kernel_mutex); @@ -1200,7 +1201,7 @@ trx_purge( (ulong) purge_sys->n_pages_handled); } - return(purge_sys->n_pages_handled - old_pages_handled); + return((ulint) (purge_sys->n_pages_handled - old_pages_handled)); } /******************************************************************//** diff --git a/trx/trx0rec.c b/trx/trx0rec.c index 104b80f5d96..db4897c368d 100644 --- a/trx/trx0rec.c +++ b/trx/trx0rec.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -1016,6 +1016,7 @@ trx_undo_update_rec_get_update( fprintf(stderr, "\n" "InnoDB: n_fields = %lu, i = %lu, ptr %p\n", (ulong) n_fields, (ulong) i, ptr); + ut_ad(0); *upd = NULL; return(NULL); } @@ -1210,6 +1211,7 @@ trx_undo_report_row_operation( trx_t* trx; trx_undo_t* undo; ulint page_no; + buf_block_t* undo_block; trx_rseg_t* rseg; mtr_t mtr; ulint err = DB_SUCCESS; @@ -1252,10 +1254,13 @@ trx_undo_report_row_operation( if (UNIV_UNLIKELY(!undo)) { /* Did not succeed */ + ut_ad(err != DB_SUCCESS); mutex_exit(&(trx->undo_mutex)); return(err); } + + ut_ad(err == DB_SUCCESS); } else { ut_ad(op_type == TRX_UNDO_MODIFY_OP); @@ -1269,30 +1274,30 @@ trx_undo_report_row_operation( if (UNIV_UNLIKELY(!undo)) { /* Did not succeed */ + ut_ad(err != DB_SUCCESS); mutex_exit(&(trx->undo_mutex)); return(err); } + ut_ad(err == DB_SUCCESS); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); } - page_no = undo->last_page_no; - mtr_start(&mtr); + page_no = undo->last_page_no; + undo_block = buf_page_get_gen( + undo->space, undo->zip_size, page_no, RW_X_LATCH, + undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr); + buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE); + do { - buf_block_t* undo_block; page_t* undo_page; ulint offset; - undo_block = buf_page_get_gen(undo->space, undo->zip_size, - page_no, RW_X_LATCH, - undo->guess_block, BUF_GET, - __FILE__, __LINE__, &mtr); - buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE); - undo_page = buf_block_get_frame(undo_block); + ut_ad(page_no == buf_block_get_page_no(undo_block)); if (op_type == TRX_UNDO_INSERT_OP) { offset = trx_undo_page_report_insert( @@ -1369,12 +1374,11 @@ trx_undo_report_row_operation( a pessimistic insert in a B-tree, and we must reserve the counterpart of the tree latch, which is the rseg mutex. */ - mutex_enter(&(rseg->mutex)); - - page_no = trx_undo_add_page(trx, undo, &mtr); - - mutex_exit(&(rseg->mutex)); - } while (UNIV_LIKELY(page_no != FIL_NULL)); + mutex_enter(&rseg->mutex); + undo_block = trx_undo_add_page(trx, undo, &mtr); + mutex_exit(&rseg->mutex); + page_no = undo->last_page_no; + } while (undo_block != NULL); /* Did not succeed: out of space */ err = DB_OUT_OF_FILE_SPACE; @@ -1526,6 +1530,7 @@ trx_undo_prev_version_build( "InnoDB: record version ", stderr); rec_print_new(stderr, rec, offsets); putc('\n', stderr); + ut_ad(0); return(DB_ERROR); } @@ -1631,6 +1636,7 @@ trx_undo_prev_version_build( (ullint) old_roll_ptr, (ullint) roll_ptr); trx_purge_sys_print(); + ut_ad(0); return(DB_ERROR); } diff --git a/trx/trx0sys.c b/trx/trx0sys.c index 548f383742f..89fab47a0ad 100644 --- a/trx/trx0sys.c +++ b/trx/trx0sys.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -137,6 +137,11 @@ UNIV_INTERN mysql_pfs_key_t trx_doublewrite_mutex_key; UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key; #endif /* UNIV_PFS_MUTEX */ +#ifdef UNIV_DEBUG +/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */ +uint trx_rseg_n_slots_debug = 0; +#endif + #ifndef UNIV_HOTBACKUP /** This is used to track the maximum file format id known to InnoDB. It's updated via SET GLOBAL innodb_file_format_max = 'x' or when we open @@ -253,9 +258,7 @@ trx_sys_create_doublewrite_buf(void) { buf_block_t* block; buf_block_t* block2; -#ifdef UNIV_SYNC_DEBUG buf_block_t* new_block; -#endif /* UNIV_SYNC_DEBUG */ byte* doublewrite; byte* fseg_header; ulint page_no; @@ -334,10 +337,9 @@ start_again: for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2; i++) { - page_no = fseg_alloc_free_page(fseg_header, - prev_page_no + 1, - FSP_UP, &mtr); - if (page_no == FIL_NULL) { + new_block = fseg_alloc_free_page( + fseg_header, prev_page_no + 1, FSP_UP, &mtr); + if (new_block == NULL) { fprintf(stderr, "InnoDB: Cannot create doublewrite" " buffer: you must\n" @@ -358,13 +360,8 @@ start_again: the page position in the tablespace, then the page has not been written to in doublewrite. */ -#ifdef UNIV_SYNC_DEBUG - new_block = -#endif /* UNIV_SYNC_DEBUG */ - buf_page_get(TRX_SYS_SPACE, 0, page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(new_block, - SYNC_NO_ORDER_CHECK); + ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1); + page_no = buf_block_get_page_no(new_block); if (i == FSP_EXTENT_SIZE / 2) { ut_a(page_no == FSP_EXTENT_SIZE); @@ -434,7 +431,10 @@ start_again: /* The doublewrite buffer has already been created: just read in some numbers */ + trx_doublewrite_init(doublewrite); + mtr_commit(&mtr); + trx_doublewrite_buf_is_being_created = FALSE; } else { fprintf(stderr, "InnoDB: Doublewrite buffer not found in the doublewrite file:" @@ -481,13 +481,12 @@ start_again: for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2; i++) { - page_no = fseg_alloc_free_page(fseg_header, - prev_page_no + 1, - FSP_UP, &mtr); - if (page_no == FIL_NULL) { + new_block = fseg_alloc_free_page( + fseg_header, prev_page_no + 1, FSP_UP, &mtr); + if (new_block == NULL) { fprintf(stderr, - "InnoDB: Cannot create the doublewrite" - " buffer: You must\n" + "InnoDB: Cannot create doublewrite" + " buffer: you must\n" "InnoDB: increase your" " tablespace size.\n" "InnoDB: Cannot continue operation.\n" @@ -505,13 +504,8 @@ start_again: the page position in the tablespace, then the page has not been written to in doublewrite. */ -#ifdef UNIV_SYNC_DEBUG - new_block = -#endif /* UNIV_SYNC_DEBUG */ - buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(new_block, - SYNC_NO_ORDER_CHECK); + ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1); + page_no = buf_block_get_page_no(new_block); if (i == FSP_EXTENT_SIZE / 2) { ut_a(page_no == FSP_EXTENT_SIZE); @@ -746,12 +740,16 @@ trx_sys_doublewrite_init_or_restore_pages( if (buf_page_is_corrupted(page, zip_size)) { fprintf(stderr, "InnoDB: Dump of the page:\n"); - buf_page_print(read_buf, zip_size); + buf_page_print( + read_buf, zip_size, + BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "InnoDB: Dump of" " corresponding page" " in doublewrite buffer:\n"); - buf_page_print(page, zip_size); + buf_page_print( + page, zip_size, + BUF_PAGE_PRINT_NO_CRASH); fprintf(stderr, "InnoDB: Also the page in the" @@ -765,7 +763,7 @@ trx_sys_doublewrite_init_or_restore_pages( "InnoDB: option:\n" "InnoDB:" " innodb_force_recovery=6\n"); - exit(1); + ut_error; } /* Write the good page from the diff --git a/trx/trx0trx.c b/trx/trx0trx.c index a14f5db8176..6cf97f65dd0 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -1140,6 +1140,8 @@ trx_commit_off_kernel( ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); + + trx->error_state = DB_SUCCESS; } /****************************************************************//** diff --git a/trx/trx0undo.c b/trx/trx0undo.c index dae0637f72c..3d794c69c8b 100644 --- a/trx/trx0undo.c +++ b/trx/trx0undo.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -876,9 +876,9 @@ trx_undo_discard_latest_update_undo( #ifndef UNIV_HOTBACKUP /********************************************************************//** Tries to add a page to the undo log segment where the undo log is placed. -@return page number if success, else FIL_NULL */ +@return X-latched block if success, else NULL */ UNIV_INTERN -ulint +buf_block_t* trx_undo_add_page( /*==============*/ trx_t* trx, /*!< in: transaction */ @@ -888,11 +888,10 @@ trx_undo_add_page( the rollback segment mutex */ { page_t* header_page; + buf_block_t* new_block; page_t* new_page; trx_rseg_t* rseg; - ulint page_no; ulint n_reserved; - ibool success; ut_ad(mutex_own(&(trx->undo_mutex))); ut_ad(!mutex_own(&kernel_mutex)); @@ -902,37 +901,37 @@ trx_undo_add_page( if (rseg->curr_size == rseg->max_size) { - return(FIL_NULL); + return(NULL); } header_page = trx_undo_page_get(undo->space, undo->zip_size, undo->hdr_page_no, mtr); - success = fsp_reserve_free_extents(&n_reserved, undo->space, 1, - FSP_UNDO, mtr); - if (!success) { + if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1, + FSP_UNDO, mtr)) { - return(FIL_NULL); + return(NULL); } - page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER, - undo->top_page_no + 1, FSP_UP, - TRUE, mtr); + new_block = fseg_alloc_free_page_general( + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + header_page, + undo->top_page_no + 1, FSP_UP, TRUE, mtr, mtr); fil_space_release_free_extents(undo->space, n_reserved); - if (page_no == FIL_NULL) { + if (new_block == NULL) { /* No space left */ - return(FIL_NULL); + return(NULL); } - undo->last_page_no = page_no; + ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1); + buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE); + undo->last_page_no = buf_block_get_page_no(new_block); - new_page = trx_undo_page_get(undo->space, undo->zip_size, - page_no, mtr); + new_page = buf_block_get_frame(new_block); trx_undo_page_init(new_page, undo->type, mtr); @@ -941,7 +940,7 @@ trx_undo_add_page( undo->size++; rseg->curr_size++; - return(page_no); + return(new_block); } /********************************************************************//** diff --git a/ut/ut0mem.c b/ut/ut0mem.c index 303fdd6dd44..cb6b050beca 100644 --- a/ut/ut0mem.c +++ b/ut/ut0mem.c @@ -84,17 +84,13 @@ ut_mem_init(void) #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. +Allocates memory. @return own: allocated memory */ UNIV_INTERN void* ut_malloc_low( /*==========*/ ulint n, /*!< in: number of bytes to allocate */ - ibool set_to_zero, /*!< in: TRUE if allocated memory should be - set to zero if UNIV_SET_MEM_TO_ZERO is - defined */ ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the memory cannot be allocated */ { @@ -106,12 +102,6 @@ ut_malloc_low( ret = malloc(n); ut_a(ret || !assert_on_error); -#ifdef UNIV_SET_MEM_TO_ZERO - if (set_to_zero) { - memset(ret, '\0', n); - UNIV_MEM_ALLOC(ret, n); - } -#endif return(ret); } @@ -193,12 +183,6 @@ retry: } } - if (set_to_zero) { -#ifdef UNIV_SET_MEM_TO_ZERO - memset(ret, '\0', n + sizeof(ut_mem_block_t)); -#endif - } - UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t)); ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); @@ -215,74 +199,10 @@ retry: void* ret = malloc(n); ut_a(ret || !assert_on_error); -# ifdef UNIV_SET_MEM_TO_ZERO - if (set_to_zero) { - memset(ret, '\0', n); - } -# endif return(ret); #endif /* !UNIV_HOTBACKUP */ } -/**********************************************************************//** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc( -/*======*/ - ulint n) /*!< in: number of bytes to allocate */ -{ -#ifndef UNIV_HOTBACKUP - return(ut_malloc_low(n, TRUE, TRUE)); -#else /* !UNIV_HOTBACKUP */ - return(malloc(n)); -#endif /* !UNIV_HOTBACKUP */ -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -ut_test_malloc( -/*===========*/ - ulint n) /*!< in: try to allocate this many bytes */ -{ - void* ret; - - ret = malloc(n); - - if (ret == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: cannot allocate" - " %lu bytes of memory for\n" - "InnoDB: a BLOB with malloc! Total allocated memory\n" - "InnoDB: by InnoDB %lu bytes." - " Operating system errno: %d\n" - "InnoDB: Check if you should increase" - " the swap file or\n" - "InnoDB: ulimits of your operating system.\n" - "InnoDB: On FreeBSD check you have" - " compiled the OS with\n" - "InnoDB: a big enough maximum process size.\n", - (ulong) n, - (ulong) ut_total_allocated_memory, - (int) errno); - return(FALSE); - } - - free(ret); - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - /**********************************************************************//** Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is a nop. */ diff --git a/ut/ut0ut.c b/ut/ut0ut.c index f6dfb3ba0b3..117a777cb98 100644 --- a/ut/ut0ut.c +++ b/ut/ut0ut.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, Oracle Corpn. All Rights Reserved. +Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -718,6 +718,8 @@ ut_strerr( return("Undo record too big"); case DB_END_OF_INDEX: return("End of index"); + case DB_TABLE_IN_FK_CHECK: + return("Table is being used in foreign key check"); /* do not add default: in order to produce a warning if new code is added to the enum but not added here */ } From 13982b5a118ab9060d786da646af94d81638bc3b Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 15 Jun 2012 10:26:06 +0200 Subject: [PATCH 38/39] comments --- storage/xtradb/include/univ.i | 3 +++ support-files/rpm/server.cnf | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 1a48d68d0d7..dd108bcbfe3 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -54,6 +54,9 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX 8 #ifndef PERCONA_INNODB_VERSION +/* this is *not* the version of XtraDB as in Percona-Server sources, + but the version of Percona-Server, where this XtraDB was taken from. + Because Percona does not update XtraDB version for every release */ #define PERCONA_INNODB_VERSION 24.1 #endif diff --git a/support-files/rpm/server.cnf b/support-files/rpm/server.cnf index 8cf2c74dbe4..2025a8b811a 100644 --- a/support-files/rpm/server.cnf +++ b/support-files/rpm/server.cnf @@ -1,14 +1,17 @@ # -# These groups are read by MariaDB server +# These groups are read by MariaDB server. # Use it for options that only the server (but not clients) should see # # See the examples of server my.cnf files in /usr/share/mysql/ # -[mysqld] - +# this is read by the standalone daemon and embedded servers [server] +# this is only for the mysqld standalone daemon +[mysqld] + +# this is only for embedded server [embedded] # This group is only read by MariaDB-5.5 servers. From a247b12fe6d3356a2f9b115e3cd850bd004b4bd3 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 15 Jun 2012 17:22:49 +0200 Subject: [PATCH 39/39] MDEV-316 lp:1009085 Assertion failed: warn_item, file item_cmpfunc.cc, line 3613 make sure that find_date_time_item() is called before agg_arg_charsets_for_comparison(). optimize Item_func_conv_charset to avoid conversion if no string result is needed --- mysql-test/r/mdev316.result | 7 +++++++ mysql-test/t/mdev316.test | 6 ++++++ sql/item_cmpfunc.cc | 19 ++++++++++--------- sql/item_strfunc.h | 8 ++++++++ 4 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 mysql-test/r/mdev316.result create mode 100644 mysql-test/t/mdev316.test diff --git a/mysql-test/r/mdev316.result b/mysql-test/r/mdev316.result new file mode 100644 index 00000000000..a035f85f106 --- /dev/null +++ b/mysql-test/r/mdev316.result @@ -0,0 +1,7 @@ +set names swe7; +select '' in ('',convert(0,time)); +'' in ('',convert(0,time)) +1 +select case '' when '' then 1 when convert(0,time) then 2 end; +case '' when '' then 1 when convert(0,time) then 2 end +1 diff --git a/mysql-test/t/mdev316.test b/mysql-test/t/mdev316.test new file mode 100644 index 00000000000..376fd9ffc25 --- /dev/null +++ b/mysql-test/t/mdev316.test @@ -0,0 +1,6 @@ +# +# MDEV-316 lp:1009085 Assertion failed: warn_item, file item_cmpfunc.cc, line 3613 +# +set names swe7; +select '' in ('',convert(0,time)); +select case '' when '' then 1 when convert(0,time) then 2 end; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 5f1a863d8fd..e38fe057356 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -3032,6 +3032,11 @@ void Item_func_case::fix_length_and_dec() nagg++; if (!(found_types= collect_cmp_types(agg, nagg))) return; + + Item *date_arg= 0; + if (found_types & (1 << TIME_RESULT)) + date_arg= find_date_time_item(args, arg_count, 0); + if (found_types & (1 << STRING_RESULT)) { /* @@ -3071,16 +3076,12 @@ void Item_func_case::fix_length_and_dec() change_item_tree_if_needed(thd, &args[nagg * 2], agg[nagg + 1]); } - Item *date_arg= 0; for (i= 0; i <= (uint)TIME_RESULT; i++) { if (found_types & (1 << i) && !cmp_items[i]) { DBUG_ASSERT((Item_result)i != ROW_RESULT); - if ((Item_result)i == TIME_RESULT) - date_arg= find_date_time_item(args, arg_count, 0); - if (!(cmp_items[i]= cmp_item::get_comparator((Item_result)i, date_arg, cmp_collation.collation))) @@ -4051,15 +4052,15 @@ void Item_func_in::fix_length_and_dec() } else { + if (found_types & (1 << TIME_RESULT)) + date_arg= find_date_time_item(args, arg_count, 0); + if (found_types & (1 << STRING_RESULT) && + agg_arg_charsets_for_comparison(cmp_collation, args, arg_count)) + return; for (i= 0; i <= (uint) TIME_RESULT; i++) { if (found_types & (1 << i) && !cmp_items[i]) { - if ((Item_result)i == STRING_RESULT && - agg_arg_charsets_for_comparison(cmp_collation, args, arg_count)) - return; - if ((Item_result)i == TIME_RESULT) - date_arg= find_date_time_item(args, arg_count, 0); if (!cmp_items[i] && !(cmp_items[i]= cmp_item::get_comparator((Item_result)i, date_arg, cmp_collation.collation))) diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index ad854b02765..3361464814e 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -854,6 +854,14 @@ public: } } String *val_str(String *); + longlong val_int() + { return args[0]->val_int(); } + double val_real() + { return args[0]->val_real(); } + my_decimal *val_decimal(my_decimal *d) + { return args[0]->val_decimal(d); } + bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) + { return args[0]->get_date(ltime, fuzzydate); } void fix_length_and_dec(); const char *func_name() const { return "convert"; } virtual void print(String *str, enum_query_type query_type);