From deb3b9a17498d101468fc12a633245fc74730133 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Wed, 25 May 2011 16:01:56 -0700 Subject: [PATCH 001/359] Downported InnoDB support of Index Condition Pushdown from MySQL-5.6 code line. --- include/my_handler.h | 4 +- mysql-test/include/icp_tests.inc | 60 ++ mysql-test/r/index_merge_innodb.result | 2 +- mysql-test/r/innodb.result | 2 +- mysql-test/r/innodb_icp.result | 59 ++ mysql-test/r/maria_icp.result | 65 ++ mysql-test/r/myisam_icp.result | 65 ++ mysql-test/r/myisam_mrr.result | 2 +- mysql-test/r/null_key.result | 20 +- mysql-test/r/order_by.result | 2 +- .../r/range_vs_index_merge_innodb.result | 26 +- mysql-test/suite/innodb/r/innodb.result | 2 +- .../r/innodb_lock_wait_timeout_1.result | 4 +- mysql-test/suite/innodb/r/innodb_mysql.result | 4 +- .../r/innodb_lock_wait_timeout_1.result | 4 +- mysql-test/suite/pbxt/r/null_key.result | 20 +- sql/opt_index_cond_pushdown.cc | 65 +- storage/xtradb/dict/dict0dict.c | 13 +- storage/xtradb/handler/ha_innodb.cc | 598 +++++++++++------- storage/xtradb/handler/ha_innodb.h | 85 ++- storage/xtradb/include/dict0dict.h | 12 + storage/xtradb/include/dict0dict.ic | 14 + storage/xtradb/include/ha_prototypes.h | 9 + storage/xtradb/include/row0mysql.h | 29 +- storage/xtradb/row/row0sel.c | 594 +++++++++-------- 25 files changed, 1169 insertions(+), 591 deletions(-) diff --git a/include/my_handler.h b/include/my_handler.h index 5fc0565538d..f228ff6c794 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -135,6 +135,7 @@ extern void my_handler_error_unregister(void); if we're scanning "t.key BETWEEN 10 AND 20" and got a "t.key=21" tuple (the engine should stop scanning and return HA_ERR_END_OF_FILE right away). + 3=ICP_ABORTED_BY_USER -1= ICP_ERROR - Reserved for internal errors in engines. Should not be returned by index_cond_func_xxx @@ -144,7 +145,8 @@ typedef enum icp_result { ICP_ERROR=-1, ICP_NO_MATCH=0, ICP_MATCH=1, - ICP_OUT_OF_RANGE=2 + ICP_OUT_OF_RANGE=2, + ICP_ABORTED_BY_USER=3, } ICP_RESULT; diff --git a/mysql-test/include/icp_tests.inc b/mysql-test/include/icp_tests.inc index 52f30188455..52f89ec903a 100644 --- a/mysql-test/include/icp_tests.inc +++ b/mysql-test/include/icp_tests.inc @@ -225,3 +225,63 @@ SELECT COUNT(*) FROM t3; DROP PROCEDURE insert_data; DROP TABLE t1, t2, t3; + +--echo # +--echo # Bug#57372 "Multi-table updates and deletes fail when running with ICP +--echo # against InnoDB" +--echo # + +CREATE TABLE t1 ( + a INT KEY, + b INT +); + +CREATE TABLE t2 ( + a INT KEY, + b INT +); + +INSERT INTO t1 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105); +INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); + +UPDATE t1, t2 +SET t1.a = t1.a + 100, t2.b = t1.a + 10 +WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b - 100; + +--sorted_result +SELECT * FROM t1; +--sorted_result +SELECT * FROM t2; + +DROP TABLE t1, t2; + +--echo # +--echo # Bug#59259 "Incorrect rows returned for a correlated subquery +--echo # when ICP is on" +--echo # + +CREATE TABLE t1 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; + +INSERT INTO t1 VALUES (11,0); +INSERT INTO t1 VALUES (12,5); +INSERT INTO t1 VALUES (15,0); + +CREATE TABLE t2 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; + +INSERT INTO t2 VALUES (11,1); +INSERT INTO t2 VALUES (12,2); +INSERT INTO t2 VALUES (15,4); + +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='semijoin=off'; + +EXPLAIN +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); + +set optimizer_switch=@save_optimizer_switch; + +DROP TABLE t1, t2; + diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result index 17663076ca3..e57af584f8a 100644 --- a/mysql-test/r/index_merge_innodb.result +++ b/mysql-test/r/index_merge_innodb.result @@ -549,7 +549,7 @@ primary key (pk1, pk2) ); explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range PRIMARY,key1 PRIMARY 8 NULL 9 Using where +1 SIMPLE t1 range PRIMARY,key1 PRIMARY 8 NULL 9 Using index condition; Using where select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; pk1 pk2 key1 key2 pktail1ok pktail2ok pktail3bad pktail4bad pktail5bad pk2copy badkey filler1 filler2 1 10 0 0 0 0 0 0 0 10 0 filler-data-10 filler2 diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index ea0972a6a43..76db17c13f8 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -778,7 +778,7 @@ create table t1 (a int primary key,b int, c int, d int, e int, f int, g int, h insert into t1 values (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); explain select * from t1 where a > 0 and a < 50; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL # Using where +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL # Using index condition drop table t1; create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb; insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL'); diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result index e4d81a9a3d5..c3643e62caa 100644 --- a/mysql-test/r/innodb_icp.result +++ b/mysql-test/r/innodb_icp.result @@ -200,4 +200,63 @@ COUNT(*) 12 DROP PROCEDURE insert_data; DROP TABLE t1, t2, t3; +# +# Bug#57372 "Multi-table updates and deletes fail when running with ICP +# against InnoDB" +# +CREATE TABLE t1 ( +a INT KEY, +b INT +); +CREATE TABLE t2 ( +a INT KEY, +b INT +); +INSERT INTO t1 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105); +INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +UPDATE t1, t2 +SET t1.a = t1.a + 100, t2.b = t1.a + 10 +WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b - 100; +SELECT * FROM t1; +a b +1 101 +102 102 +103 103 +104 104 +5 105 +SELECT * FROM t2; +a b +1 1 +2 12 +3 13 +4 14 +5 5 +DROP TABLE t1, t2; +# +# Bug#59259 "Incorrect rows returned for a correlated subquery +# when ICP is on" +# +CREATE TABLE t1 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +INSERT INTO t1 VALUES (11,0); +INSERT INTO t1 VALUES (12,5); +INSERT INTO t1 VALUES (15,0); +CREATE TABLE t2 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +INSERT INTO t2 VALUES (11,1); +INSERT INTO t2 VALUES (12,2); +INSERT INTO t2 VALUES (15,4); +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='semijoin=off'; +EXPLAIN +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 Using where +2 DEPENDENT SUBQUERY t2 index NULL PRIMARY 4 NULL 3 Using index +2 DEPENDENT SUBQUERY it eq_ref PRIMARY PRIMARY 4 func 1 Using index condition +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +pk i +12 5 +set optimizer_switch=@save_optimizer_switch; +DROP TABLE t1, t2; set storage_engine= @save_storage_engine; diff --git a/mysql-test/r/maria_icp.result b/mysql-test/r/maria_icp.result index 7db44c0028f..701dd5a4bd5 100644 --- a/mysql-test/r/maria_icp.result +++ b/mysql-test/r/maria_icp.result @@ -200,4 +200,69 @@ COUNT(*) 12 DROP PROCEDURE insert_data; DROP TABLE t1, t2, t3; +# +# Bug#57372 "Multi-table updates and deletes fail when running with ICP +# against InnoDB" +# +CREATE TABLE t1 ( +a INT KEY, +b INT +); +CREATE TABLE t2 ( +a INT KEY, +b INT +); +INSERT INTO t1 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105); +INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +UPDATE t1, t2 +SET t1.a = t1.a + 100, t2.b = t1.a + 10 +WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b - 100; +SELECT * FROM t1; +a b +1 101 +102 102 +103 103 +104 104 +5 105 +SELECT * FROM t2; +a b +1 1 +2 12 +3 13 +4 14 +5 5 +DROP TABLE t1, t2; +# +# Bug#59259 "Incorrect rows returned for a correlated subquery +# when ICP is on" +# +CREATE TABLE t1 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +Warnings: +Warning 1286 Unknown table engine 'InnoDB' +Warning 1266 Using storage engine Aria for table 't1' +INSERT INTO t1 VALUES (11,0); +INSERT INTO t1 VALUES (12,5); +INSERT INTO t1 VALUES (15,0); +CREATE TABLE t2 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +Warnings: +Warning 1286 Unknown table engine 'InnoDB' +Warning 1266 Using storage engine Aria for table 't2' +INSERT INTO t2 VALUES (11,1); +INSERT INTO t2 VALUES (12,2); +INSERT INTO t2 VALUES (15,4); +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='semijoin=off'; +EXPLAIN +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 Using where +2 DEPENDENT SUBQUERY t2 index NULL PRIMARY 4 NULL 3 Using index +2 DEPENDENT SUBQUERY it eq_ref PRIMARY PRIMARY 4 func 1 Using index condition +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +pk i +12 5 +set optimizer_switch=@save_optimizer_switch; +DROP TABLE t1, t2; set storage_engine= @save_storage_engine; diff --git a/mysql-test/r/myisam_icp.result b/mysql-test/r/myisam_icp.result index 45d45bd3452..742792ec772 100644 --- a/mysql-test/r/myisam_icp.result +++ b/mysql-test/r/myisam_icp.result @@ -198,4 +198,69 @@ COUNT(*) 12 DROP PROCEDURE insert_data; DROP TABLE t1, t2, t3; +# +# Bug#57372 "Multi-table updates and deletes fail when running with ICP +# against InnoDB" +# +CREATE TABLE t1 ( +a INT KEY, +b INT +); +CREATE TABLE t2 ( +a INT KEY, +b INT +); +INSERT INTO t1 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105); +INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +UPDATE t1, t2 +SET t1.a = t1.a + 100, t2.b = t1.a + 10 +WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b - 100; +SELECT * FROM t1; +a b +1 101 +102 102 +103 103 +104 104 +5 105 +SELECT * FROM t2; +a b +1 1 +2 12 +3 13 +4 14 +5 5 +DROP TABLE t1, t2; +# +# Bug#59259 "Incorrect rows returned for a correlated subquery +# when ICP is on" +# +CREATE TABLE t1 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +Warnings: +Warning 1286 Unknown table engine 'InnoDB' +Warning 1266 Using storage engine MyISAM for table 't1' +INSERT INTO t1 VALUES (11,0); +INSERT INTO t1 VALUES (12,5); +INSERT INTO t1 VALUES (15,0); +CREATE TABLE t2 (pk INTEGER PRIMARY KEY, i INTEGER NOT NULL) ENGINE=InnoDB; +Warnings: +Warning 1286 Unknown table engine 'InnoDB' +Warning 1266 Using storage engine MyISAM for table 't2' +INSERT INTO t2 VALUES (11,1); +INSERT INTO t2 VALUES (12,2); +INSERT INTO t2 VALUES (15,4); +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='semijoin=off'; +EXPLAIN +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 Using where +2 DEPENDENT SUBQUERY t2 index NULL PRIMARY 4 NULL 3 Using index +2 DEPENDENT SUBQUERY it eq_ref PRIMARY PRIMARY 4 func 1 Using index condition +SELECT * FROM t1 +WHERE pk IN (SELECT it.pk FROM t2 JOIN t2 AS it ON it.i=it.i WHERE it.pk-t1.i<10); +pk i +12 5 +set optimizer_switch=@save_optimizer_switch; +DROP TABLE t1, t2; drop table if exists t0, t1, t1i, t1m; diff --git a/mysql-test/r/myisam_mrr.result b/mysql-test/r/myisam_mrr.result index d0f0d12c28e..c7bc3c6229d 100644 --- a/mysql-test/r/myisam_mrr.result +++ b/mysql-test/r/myisam_mrr.result @@ -363,7 +363,7 @@ update t1 set b=repeat(char(65+a), 20) where a < 25; This must show range + using index condition: explain select * from t1 where a < 10 and b = repeat(char(65+a), 20); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 19 Using where +1 SIMPLE t1 range a a 5 NULL 19 Using index condition; Using where select * from t1 where a < 10 and b = repeat(char(65+a), 20); a b filler 0 AAAAAAAAAAAAAAAAAAAA filler diff --git a/mysql-test/r/null_key.result b/mysql-test/r/null_key.result index db2f662eeac..488110b88e3 100644 --- a/mysql-test/r/null_key.result +++ b/mysql-test/r/null_key.result @@ -76,13 +76,13 @@ insert into t2 select * from t1; alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); explain select * from t1 where a is null and b = 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b = 2 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b = 7 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a=2 and b = 2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ref a,b a 5 const 1 Using where @@ -91,25 +91,25 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 12 Using where explain select * from t1 where (a is null or a > 0 and a < 3) and b < 5 and c=0 limit 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a,b a 5 NULL 5 Using where +1 SIMPLE t1 range a,b a 5 NULL 5 Using index condition; Using where explain select * from t1 where (a is null or a = 7) and b=7 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref_or_null a,b a 5 const 4 Using where +1 SIMPLE t1 ref_or_null a,b a 5 const 4 Using index condition; Using where explain select * from t1 where (a is null and b>a) or a is null and b=7 limit 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b=9 or a is null and b=7 limit 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a > 1 and a < 3 limit 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where +1 SIMPLE t1 range a a 5 NULL 1 Using index condition explain select * from t1 where a is null and b=7 or a > 1 and a < 3 limit 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a,b a 5 NULL 4 Using where +1 SIMPLE t1 range a,b a 5 NULL 4 Using index condition; Using where explain select * from t1 where a > 8 and a < 9; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where +1 SIMPLE t1 range a a 5 NULL 1 Using index condition explain select * from t1 where b like "6%"; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range b b 12 NULL 1 Using where diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result index 24a1727b0b9..b0db439b570 100644 --- a/mysql-test/r/order_by.result +++ b/mysql-test/r/order_by.result @@ -614,7 +614,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range FieldKey FieldKey 38 NULL 4 Using index condition; Using MRR; Using filesort EXPLAIN SELECT * FROM t1 IGNORE INDEX (FieldKey, LongField) WHERE FieldKey > '2' ORDER BY LongVal; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range StringField StringField 38 NULL 4 Using where; Using filesort +1 SIMPLE t1 range StringField StringField 38 NULL 4 Using index condition; Using filesort SELECT * FROM t1 WHERE FieldKey > '2' ORDER BY LongVal; FieldKey LongVal StringVal 3 1 2 diff --git a/mysql-test/r/range_vs_index_merge_innodb.result b/mysql-test/r/range_vs_index_merge_innodb.result index 73cff1068a9..af16c3a6ac9 100644 --- a/mysql-test/r/range_vs_index_merge_innodb.result +++ b/mysql-test/r/range_vs_index_merge_innodb.result @@ -328,15 +328,15 @@ ID Name Country Population EXPLAIN SELECT * FROM City WHERE (ID < 10) OR (ID BETWEEN 100 AND 110); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 20 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 20 Using index condition EXPLAIN SELECT * FROM City WHERE (ID < 200) OR (ID BETWEEN 100 AND 200); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 199 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 199 Using index condition EXPLAIN SELECT * FROM City WHERE (ID < 600) OR (ID BETWEEN 900 AND 1500); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 2006 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 2006 Using index condition EXPLAIN SELECT * FROM City WHERE Country > 'A' AND Country < 'ARG'; id select_type table type possible_keys key key_len ref rows Extra @@ -355,7 +355,7 @@ WHERE ((ID < 10) AND (Name LIKE 'H%' OR (Country > 'A' AND Country < 'ARG'))) OR ((ID BETWEEN 100 AND 110) AND (Name LIKE 'P%' OR (Population > 103000 AND Population < 104000))); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY,Population,Country,Name PRIMARY 4 NULL 20 Using where +1 SIMPLE City range PRIMARY,Population,Country,Name PRIMARY 4 NULL 20 Using index condition; Using where EXPLAIN SELECT * FROM City WHERE ((ID < 800) AND (Name LIKE 'Ha%' OR (Country > 'A' AND Country < 'ARG'))) @@ -369,7 +369,7 @@ WHERE ((ID < 200) AND (Name LIKE 'Ha%' OR (Country > 'A' AND Country < 'ARG'))) OR ((ID BETWEEN 100 AND 200) AND (Name LIKE 'Pa%' OR (Population > 103000 AND Population < 104000))); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY,Population,Country,Name PRIMARY 4 NULL 199 Using where +1 SIMPLE City range PRIMARY,Population,Country,Name PRIMARY 4 NULL 199 Using index condition; Using where SELECT * FROM City USE INDEX () WHERE ((ID < 10) AND (Name LIKE 'H%' OR (Country > 'A' AND Country < 'ARG'))) OR ((ID BETWEEN 100 AND 110) AND @@ -601,11 +601,11 @@ id select_type table type possible_keys key key_len ref rows Extra EXPLAIN SELECT * FROM City WHERE ID BETWEEN 3400 AND 3800; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 944 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 944 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 3790 AND 3800; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using index condition EXPLAIN SELECT * FROM City WHERE Name LIKE 'P%'; id select_type table type possible_keys key key_len ref rows Extra @@ -765,27 +765,27 @@ id select_type table type possible_keys key key_len ref rows Extra EXPLAIN SELECT * FROM City WHERE ID BETWEEN 3790 AND 3800; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 4025 AND 4035; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 4028 AND 4032; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 5 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 5 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 3500 AND 3800; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 300 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 300 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 4000 AND 4300; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 80 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 80 Using index condition EXPLAIN SELECT * FROM City WHERE ID BETWEEN 250 and 260 ; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using where +1 SIMPLE City range PRIMARY PRIMARY 4 NULL 11 Using index condition EXPLAIN SELECT * FROM City WHERE (Population > 101000 AND Population < 102000); id select_type table type possible_keys key key_len ref rows Extra diff --git a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result index b8d02ecbc78..46b850ad37d 100644 --- a/mysql-test/suite/innodb/r/innodb.result +++ b/mysql-test/suite/innodb/r/innodb.result @@ -778,7 +778,7 @@ create table t1 (a int primary key,b int, c int, d int, e int, f int, g int, h insert into t1 values (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); explain select * from t1 where a > 0 and a < 50; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL # Using where +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL # Using index condition drop table t1; create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb; insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL'); diff --git a/mysql-test/suite/innodb/r/innodb_lock_wait_timeout_1.result b/mysql-test/suite/innodb/r/innodb_lock_wait_timeout_1.result index bd8760b8f79..e8ba522af2e 100644 --- a/mysql-test/suite/innodb/r/innodb_lock_wait_timeout_1.result +++ b/mysql-test/suite/innodb/r/innodb_lock_wait_timeout_1.result @@ -119,7 +119,7 @@ key PRIMARY key_len 4 ref t2.a rows 1 -Extra Using where +Extra Using index condition; Using where id 2 select_type DERIVED table NULL @@ -323,7 +323,7 @@ key PRIMARY key_len 4 ref t2.a rows 1 -Extra Using where +Extra Using index condition; Using where id 2 select_type DERIVED table NULL diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result index ed2f0254e4d..83bf287798a 100644 --- a/mysql-test/suite/innodb/r/innodb_mysql.result +++ b/mysql-test/suite/innodb/r/innodb_mysql.result @@ -2766,7 +2766,7 @@ WHERE t2.i = t1.pk AND t1.pk BETWEEN 0 AND 224 HAVING f > 7 ORDER BY f; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 3 Using where; Using filesort +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 3 Using index condition; Using where; Using filesort 1 SIMPLE t2 ref idx idx 5 test.t1.pk 1 Using index SELECT t1 .i AS f FROM t1, t2 WHERE t2.i = t1.pk AND t1.pk BETWEEN 0 AND 224 @@ -2798,7 +2798,7 @@ SELECT t1.a FROM t1 LEFT JOIN t2 ON t1.pk = t2.a WHERE t1.pk >= 6 HAVING t1.a<> 0 AND t1.a <> 11 ORDER BY t1.a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 3 Using where; Using filesort +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 3 Using index condition; Using where; Using filesort 1 SIMPLE t2 ref a a 5 test.t1.pk 1 Using index SELECT t1.a FROM t1 LEFT JOIN t2 ON t1.pk = t2.a WHERE t1.pk >= 6 HAVING t1.a<> 0 AND t1.a <> 11 diff --git a/mysql-test/suite/innodb_plugin/r/innodb_lock_wait_timeout_1.result b/mysql-test/suite/innodb_plugin/r/innodb_lock_wait_timeout_1.result index bd8760b8f79..e8ba522af2e 100644 --- a/mysql-test/suite/innodb_plugin/r/innodb_lock_wait_timeout_1.result +++ b/mysql-test/suite/innodb_plugin/r/innodb_lock_wait_timeout_1.result @@ -119,7 +119,7 @@ key PRIMARY key_len 4 ref t2.a rows 1 -Extra Using where +Extra Using index condition; Using where id 2 select_type DERIVED table NULL @@ -323,7 +323,7 @@ key PRIMARY key_len 4 ref t2.a rows 1 -Extra Using where +Extra Using index condition; Using where id 2 select_type DERIVED table NULL diff --git a/mysql-test/suite/pbxt/r/null_key.result b/mysql-test/suite/pbxt/r/null_key.result index f7b1a496e80..bef8c92419a 100644 --- a/mysql-test/suite/pbxt/r/null_key.result +++ b/mysql-test/suite/pbxt/r/null_key.result @@ -76,13 +76,13 @@ insert into t2 select * from t1; alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); explain select * from t1 where a is null and b = 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b = 2 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b = 7 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a=2 and b = 2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ref a,b a 5 const 1 Using where @@ -91,25 +91,25 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 12 Using where explain select * from t1 where (a is null or a > 0 and a < 3) and b < 5 and c=0 limit 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a,b a 5 NULL 5 Using where +1 SIMPLE t1 range a,b a 5 NULL 5 Using index condition; Using where explain select * from t1 where (a is null or a = 7) and b=7 and c=0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref_or_null a,b a 5 const 4 Using where +1 SIMPLE t1 ref_or_null a,b a 5 const 4 Using index condition; Using where explain select * from t1 where (a is null and b>a) or a is null and b=7 limit 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a is null and b=9 or a is null and b=7 limit 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a,b a 5 const 3 Using where +1 SIMPLE t1 ref a,b a 5 const 3 Using index condition; Using where explain select * from t1 where a > 1 and a < 3 limit 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where +1 SIMPLE t1 range a a 5 NULL 1 Using index condition explain select * from t1 where a is null and b=7 or a > 1 and a < 3 limit 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a,b a 5 NULL 4 Using where +1 SIMPLE t1 range a,b a 5 NULL 4 Using index condition; Using where explain select * from t1 where a > 8 and a < 9; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where +1 SIMPLE t1 range a a 5 NULL 1 Using index condition explain select * from t1 where b like "6%"; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range b b 12 NULL 1 Using where diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc index e0a2d3b1f30..639ceedf693 100644 --- a/sql/opt_index_cond_pushdown.cc +++ b/sql/opt_index_cond_pushdown.cc @@ -83,15 +83,44 @@ bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, case Item::FIELD_ITEM: { Item_field *item_field= (Item_field*)item; - if (item_field->field->table != tbl) + Field *field= item_field->field; + if (field->table != tbl) return TRUE; /* The below is probably a repetition - the first part checks the other two, but let's play it safe: */ - return item_field->field->part_of_key.is_set(keyno) && - item_field->field->type() != MYSQL_TYPE_GEOMETRY && - item_field->field->type() != MYSQL_TYPE_BLOB; + if(!field->part_of_key.is_set(keyno) || + field->type() == MYSQL_TYPE_GEOMETRY || + field->type() == MYSQL_TYPE_BLOB) + return FALSE; + KEY *key_info= tbl->key_info + keyno; + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *key_part_end= key_part + key_info->key_parts; + for ( ; key_part < key_part_end; key_part++) + { + if (field->eq(key_part->field)) + return !(key_part->key_part_flag & HA_PART_KEY_SEG); + } + if ((tbl->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + tbl->s->primary_key != MAX_KEY && + tbl->s->primary_key != keyno) + { + key_info= tbl->key_info + tbl->s->primary_key; + key_part= key_info->key_part; + key_part_end= key_part + key_info->key_parts; + for ( ; key_part < key_part_end; key_part++) + { + /* + It does not make sense to use the fact that the engine can read in + a full field if the key if the index is built only over a part + of this field. + */ + if (field->eq(key_part->field)) + return !(key_part->key_part_flag & HA_PART_KEY_SEG); + } + } + return FALSE; } case Item::REF_ITEM: return uses_index_fields_only(item->real_item(), tbl, keyno, @@ -288,30 +317,12 @@ void push_index_cond(JOIN_TAB *tab, uint keyno) { DBUG_ENTER("push_index_cond"); Item *idx_cond; - bool do_index_cond_pushdown= - ((tab->table->file->index_flags(keyno, 0, 1) & + + if ((tab->table->file->index_flags(keyno, 0, 1) & HA_DO_INDEX_COND_PUSHDOWN) && - optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN)); - - /* - Do not try index condition pushdown on indexes which have partially-covered - columns. Unpacking from a column prefix into index tuple is not a supported - operation in some engines, see e.g. MySQL BUG#42991. - TODO: a better solution would be not to consider partially-covered columns - as parts of the index and still produce/check index condition for - fully-covered index columns. - */ - KEY *key_info= tab->table->key_info + keyno; - for (uint kp= 0; kp < key_info->key_parts; kp++) - { - if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG)) - { - do_index_cond_pushdown= FALSE; - break; - } - } - - if (do_index_cond_pushdown) + optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN) && + tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI && + tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI) { DBUG_EXECUTE("where", print_where(tab->select_cond, "full cond", QT_ORDINARY);); diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 638ab55703b..fd563202290 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -472,10 +472,12 @@ Looks for column n in an index. ULINT_UNDEFINED if not contained */ UNIV_INTERN ulint -dict_index_get_nth_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix) /*!< in: TRUE=consider + column prefixes too */ { const dict_field_t* field; const dict_col_t* col; @@ -497,7 +499,8 @@ dict_index_get_nth_col_pos( for (pos = 0; pos < n_fields; pos++) { field = dict_index_get_nth_field(index, pos); - if (col == field->col && field->prefix_len == 0) { + if (col == field->col + && (inc_prefix || field->prefix_len == 0)) { return(pos); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index a55cd92fc86..eb93981a70e 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -125,11 +125,6 @@ static pthread_cond_t commit_cond; static pthread_mutex_t commit_cond_m; static bool innodb_inited = 0; -C_MODE_START -static xtradb_icp_result_t index_cond_func_innodb(void *arg); -C_MODE_END - - #define INSIDE_HA_INNOBASE_CC @@ -1958,14 +1953,21 @@ trx_is_strict( /**************************************************************//** Resets some fields of a prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static void -reset_template( -/*===========*/ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ +inline +ha_innobase::reset_template(void) +/*=============================*/ { + ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); + ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; + /* Reset index condition pushdown state */ + prebuilt->idx_cond = NULL; + prebuilt->idx_cond_n_cols = 0; + pushed_idx_cond = NULL; + pushed_idx_cond_keyno = MAX_KEY; } /*****************************************************************//** @@ -2027,7 +2029,7 @@ ha_innobase::init_table_handle_for_HANDLER(void) we???? */ prebuilt->used_in_HANDLER = TRUE; - reset_template(prebuilt); + reset_template(); } /*********************************************************************//** @@ -3267,7 +3269,8 @@ ha_innobase::index_flags( const { return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER - | HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN); + | HA_READ_RANGE | HA_KEYREAD_ONLY + | HA_DO_INDEX_COND_PUSHDOWN); } /****************************************************************//** @@ -4093,8 +4096,8 @@ static inline uint get_field_offset( /*=============*/ - TABLE* table, /*!< in: MySQL table object */ - Field* field) /*!< in: MySQL field object */ + const TABLE* table, /*!< in: MySQL table object */ + const Field* field) /*!< in: MySQL field object */ { return((uint) (field->ptr - table->record[0])); } @@ -4647,45 +4650,170 @@ ha_innobase::store_key_val_for_row( DBUG_RETURN((uint)(buff - buff_start)); } +/**************************************************************//** +Determines if a field is needed in a prebuilt struct 'template'. +@return field to use, or NULL if the field is not needed */ +static +const Field* +build_template_needs_field( +/*=======================*/ + ibool index_contains, /*!< in: + dict_index_contains_col_or_prefix( + index, i) */ + ibool read_just_key, /*!< in: TRUE when MySQL calls + ha_innobase::extra with the + argument HA_EXTRA_KEYREAD; it is enough + to read just columns defined in + the index (i.e., no read of the + clustered index record necessary) */ + ibool fetch_all_in_key, + /*!< in: true=fetch all fields in + the index */ + ibool fetch_primary_key_cols, + /*!< in: true=fetch the + primary key columns */ + dict_index_t* index, /*!< in: InnoDB index to use */ + const TABLE* table, /*!< in: MySQL table object */ + ulint i) /*!< in: field index in InnoDB table */ +{ + const Field* field = table->field[i]; + + ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i)); + + if (!index_contains) { + if (read_just_key) { + /* If this is a 'key read', we do not need + columns that are not in the key */ + + return(NULL); + } + } else if (fetch_all_in_key) { + /* This field is needed in the query */ + + return(field); + } + + if (bitmap_is_set(table->read_set, i) + || bitmap_is_set(table->write_set, i)) { + /* This field is needed in the query */ + + return(field); + } + + if (fetch_primary_key_cols + && dict_table_col_in_clustered_key(index->table, i)) { + /* This field is needed in the query */ + + return(field); + } + + /* This field is not needed in the query, skip it */ + + return(NULL); +} + +/**************************************************************//** +Adds a field is to a prebuilt struct 'template'. +@return the field template */ +static +mysql_row_templ_t* +build_template_field( +/*=================*/ + row_prebuilt_t* prebuilt, /*!< in/out: template */ + dict_index_t* clust_index, /*!< in: InnoDB clustered index */ + dict_index_t* index, /*!< in: InnoDB index to use */ + TABLE* table, /*!< in: MySQL table object */ + const Field* field, /*!< in: field in MySQL table */ + ulint i) /*!< in: field index in InnoDB table */ +{ + mysql_row_templ_t* templ; + const dict_col_t* col; + + ut_ad(field == table->field[i]); + ut_ad(clust_index->table == index->table); + + col = dict_table_get_nth_col(index->table, i); + + templ = prebuilt->mysql_template + prebuilt->n_template++; + UNIV_MEM_INVALID(templ, sizeof *templ); + templ->col_no = i; + templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index); + ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED); + + if (dict_index_is_clust(index)) { + templ->rec_field_no = templ->clust_rec_field_no; + } else { + templ->rec_field_no = dict_index_get_nth_col_pos(index, i); + } + + if (field->null_ptr) { + templ->mysql_null_byte_offset = + (ulint) ((char*) field->null_ptr + - (char*) table->record[0]); + + templ->mysql_null_bit_mask = (ulint) field->null_bit; + } else { + templ->mysql_null_bit_mask = 0; + } + + templ->mysql_col_offset = (ulint) get_field_offset(table, field); + + templ->mysql_col_len = (ulint) field->pack_length(); + templ->type = col->mtype; + templ->mysql_type = (ulint)field->type(); + + if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { + templ->mysql_length_bytes = (ulint) + (((Field_varstring*)field)->length_bytes); + } + + templ->charset = dtype_get_charset_coll(col->prtype); + templ->mbminlen = col->mbminlen; + templ->mbmaxlen = col->mbmaxlen; + templ->is_unsigned = col->prtype & DATA_UNSIGNED; + + if (!dict_index_is_clust(index) + && templ->rec_field_no == ULINT_UNDEFINED) { + prebuilt->need_to_access_clustered = TRUE; + } + + if (prebuilt->mysql_prefix_len < templ->mysql_col_offset + + templ->mysql_col_len) { + prebuilt->mysql_prefix_len = templ->mysql_col_offset + + templ->mysql_col_len; + } + + if (templ->type == DATA_BLOB) { + prebuilt->templ_contains_blob = TRUE; + } + + return(templ); +} + /**************************************************************//** Builds a 'template' to the prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static +UNIV_INTERN void -build_template( -/*===========*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - THD* thd, /*!< in: current user thread, used - only if templ_type is - ROW_MYSQL_REC_FIELDS */ - TABLE* table, /* in: MySQL table */ - ha_innobase* file, /* in: ha_innobase handler */ - uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or - ROW_MYSQL_REC_FIELDS */ +ha_innobase::build_template( +/*========================*/ + bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW, + false=ROW_MYSQL_REC_FIELDS */ { dict_index_t* index; dict_index_t* clust_index; - mysql_row_templ_t* templ; - Field* field; - ulint n_fields, n_stored_fields; - ulint n_requested_fields = 0; + ulint n_fields; ibool fetch_all_in_key = FALSE; ibool fetch_primary_key_cols = FALSE; - ulint sql_idx, innodb_idx=0; - /* byte offset of the end of last requested column */ - ulint mysql_prefix_len = 0; - ibool do_idx_cond_push= FALSE; - ibool need_second_pass= FALSE; - + ulint i; + if (prebuilt->select_lock_type == LOCK_X) { /* We always retrieve the whole clustered index record if we use exclusive row level locks, for example, if the read is done in an UPDATE statement. */ - templ_type = ROW_MYSQL_WHOLE_ROW; - } - - if (templ_type == ROW_MYSQL_REC_FIELDS) { + whole_row = true; + } else if (!whole_row) { if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_ALL_COLS) { @@ -4702,7 +4830,7 @@ build_template( fetch_all_in_key = TRUE; } else { - templ_type = ROW_MYSQL_WHOLE_ROW; + whole_row = true; } } else if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_PRIMARY_KEY) { @@ -4719,182 +4847,206 @@ build_template( clust_index = dict_table_get_first_index(prebuilt->table); - if (templ_type == ROW_MYSQL_REC_FIELDS) { - index = prebuilt->index; - } else { - index = clust_index; - } + index = whole_row ? clust_index : prebuilt->index; - if (index == clust_index) { - prebuilt->need_to_access_clustered = TRUE; - } else { - prebuilt->need_to_access_clustered = FALSE; - /* Below we check column by column if we need to access - the clustered index */ - } + prebuilt->need_to_access_clustered = (index == clust_index); + + /* Below we check column by column if we need to access + the clustered index. */ n_fields = (ulint)table->s->fields; /* number of columns */ - n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */ if (!prebuilt->mysql_template) { prebuilt->mysql_template = (mysql_row_templ_t*) - mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t)); + mem_alloc(n_fields * sizeof(mysql_row_templ_t)); } - prebuilt->template_type = templ_type; + prebuilt->template_type = whole_row + ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS; prebuilt->null_bitmap_len = table->s->null_bytes; + /* Prepare to build prebuilt->mysql_template[]. */ prebuilt->templ_contains_blob = FALSE; + prebuilt->mysql_prefix_len = 0; + prebuilt->n_template = 0; + prebuilt->idx_cond_n_cols = 0; - - /* - Setup index condition pushdown (note: we don't need to check if - this is a scan on primary key as that is checked in idx_cond_push) - */ - if (file->active_index == file->pushed_idx_cond_keyno && - file->active_index != MAX_KEY && - templ_type == ROW_MYSQL_REC_FIELDS) - do_idx_cond_push= need_second_pass= TRUE; + /* Note that in InnoDB, i is the column number in the table. + MySQL calls columns 'fields'. */ - /* Note that in InnoDB, i is the column number. MySQL calls columns - 'fields'. */ - for (sql_idx = 0; sql_idx < n_fields; sql_idx++) { - templ = prebuilt->mysql_template + n_requested_fields; - field = table->field[sql_idx]; - if (!field->stored_in_db) - goto skip_field; + if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) { + /* Push down an index condition or an end_range check. */ + for (i = 0; i < n_fields; i++) { + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); - if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) { - /* Decide which columns we should fetch - and which we can skip. */ - register const ibool index_contains_field = - dict_index_contains_col_or_prefix(index, innodb_idx); - register const ibool index_covers_field = - field->part_of_key.is_set(file->active_index); + /* Test if an end_range or an index condition + refers to the field. Note that "index" and + "index_contains" may refer to the clustered index. + Index condition pushdown is relative to prebuilt->index + (the index that is being looked up first). */ - if (!index_contains_field && prebuilt->read_just_key) { - /* If this is a 'key read', we do not need - columns that are not in the key */ + /* When join_read_always_key() invokes this + code via handler::ha_index_init() and + ha_innobase::index_init(), end_range is not + yet initialized. Because of that, we must + always check for index_contains, instead of + the subset + field->part_of_key.is_set(active_index) + which would be acceptable if end_range==NULL. */ + if (index == prebuilt->index + ? index_contains + : dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Needed in ICP */ + const Field* field; + mysql_row_templ_t* templ; - goto skip_field; - } + if (whole_row) { + field = table->field[i]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i); + if (!field) { + continue; + } + } - if (index_contains_field && fetch_all_in_key) { - /* This field is needed in the query */ + templ = build_template_field( + prebuilt, clust_index, index, + table, field, i); + prebuilt->idx_cond_n_cols++; + ut_ad(prebuilt->idx_cond_n_cols + == prebuilt->n_template); - goto include_field; - } + if (index == prebuilt->index) { + templ->icp_rec_field_no + = templ->rec_field_no; + } else { + templ->icp_rec_field_no + = dict_index_get_nth_col_pos( + prebuilt->index, i); + } - if (bitmap_is_set(table->read_set, sql_idx) || - bitmap_is_set(table->write_set, sql_idx)) { - /* This field is needed in the query */ + if (dict_index_is_clust(prebuilt->index)) { + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); + /* If the primary key includes + a column prefix, use it in + index condition pushdown, + because the condition is + evaluated before fetching any + off-page (externally stored) + columns. */ + if (templ->icp_rec_field_no + < prebuilt->index->n_uniq) { + /* This is a key column; + all set. */ + continue; + } + } else if (templ->icp_rec_field_no + != ULINT_UNDEFINED) { + continue; + } - goto include_field; - } + /* This is a column prefix index. + The column prefix can be used in + an end_range comparison. */ - if (fetch_primary_key_cols - && dict_table_col_in_clustered_key( - index->table, innodb_idx)) { - /* This field is needed in the query */ + templ->icp_rec_field_no + = dict_index_get_nth_col_or_prefix_pos( + prebuilt->index, i, TRUE); + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); - goto include_field; - } - - /* This field is not needed in the query, skip it */ - - goto skip_field; -include_field: - if (do_idx_cond_push && - ((need_second_pass && !index_covers_field) || - (!need_second_pass && index_covers_field))) - goto skip_field; - } - n_requested_fields++; - - templ->col_no = innodb_idx; - templ->clust_rec_field_no = dict_col_get_clust_pos( - &index->table->cols[innodb_idx], clust_index); - ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED); - - if (index == clust_index) { - templ->rec_field_no = templ->clust_rec_field_no; - } else { - templ->rec_field_no = dict_index_get_nth_col_pos( - index, innodb_idx); - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; + /* Index condition pushdown can be used on + all columns of a secondary index, and on + the PRIMARY KEY columns. */ + /* TODO: enable this assertion + (but first ensure that end_range is + valid here and use an accurate condition + for end_range) + ut_ad(!dict_index_is_clust(prebuilt->index) + || templ->rec_field_no + < prebuilt->index->n_uniq); + */ } } - if (field->null_ptr) { - templ->mysql_null_byte_offset = - (ulint) ((char*) field->null_ptr - - (char*) table->record[0]); + ut_ad(prebuilt->idx_cond_n_cols > 0); + ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template); - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; + /* Include the fields that are not needed in index condition + pushdown. */ + for (i = 0; i < n_fields; i++) { + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); + + if (index == prebuilt->index + ? !index_contains + : !dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Not needed in ICP */ + const Field* field; + + if (whole_row) { + field = table->field[i]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i); + if (!field) { + continue; + } + } + + build_template_field(prebuilt, + clust_index, index, + table, field, i); + } } - templ->mysql_col_offset = (ulint) - get_field_offset(table, field); + prebuilt->idx_cond = this; + } else { + /* No index condition pushdown */ + prebuilt->idx_cond = NULL; - templ->mysql_col_len = (ulint) field->pack_length(); - if (mysql_prefix_len < templ->mysql_col_offset - + templ->mysql_col_len) { - mysql_prefix_len = templ->mysql_col_offset - + templ->mysql_col_len; + for (i = 0; i < n_fields; i++) { + const Field* field; + + if (whole_row) { + field = table->field[i]; + } else { + field = build_template_needs_field( + dict_index_contains_col_or_prefix( + index, i), + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i); + if (!field) { + continue; + } + } + + build_template_field(prebuilt, clust_index, index, + table, field, i); } - templ->type = index->table->cols[innodb_idx].mtype; - templ->mysql_type = (ulint)field->type(); - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - templ->mysql_length_bytes = (ulint) - (((Field_varstring*)field)->length_bytes); - } - - templ->charset = dtype_get_charset_coll( - index->table->cols[innodb_idx].prtype); - templ->mbminlen = index->table->cols[innodb_idx].mbminlen; - templ->mbmaxlen = index->table->cols[innodb_idx].mbmaxlen; - templ->is_unsigned = index->table->cols[innodb_idx].prtype - & DATA_UNSIGNED; - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; - } -skip_field: - if (need_second_pass && (sql_idx+1 == n_fields)) - { - prebuilt->n_index_fields= n_requested_fields; - need_second_pass= FALSE; - sql_idx= (~(ulint)0); /* to start from 0 */ - innodb_idx= (~(ulint)0); /* to start from 0 */ ///psergey-merge-merge-last-change - } - if (field->stored_in_db) { - innodb_idx++; - } - } - - prebuilt->n_template = n_requested_fields; - prebuilt->mysql_prefix_len = mysql_prefix_len; - - if (do_idx_cond_push) - { - prebuilt->idx_cond_func= index_cond_func_innodb; - prebuilt->idx_cond_func_arg= file; - } - else - { - prebuilt->idx_cond_func= NULL; - prebuilt->n_index_fields= n_requested_fields; } if (index != clust_index && prebuilt->need_to_access_clustered) { /* Change rec_field_no's to correspond to the clustered index record */ - for (ulint i = do_idx_cond_push? prebuilt->n_index_fields : 0; - i < n_requested_fields; i++) { - templ = prebuilt->mysql_template + i; + for (i = 0; i < prebuilt->n_template; i++) { + mysql_row_templ_t* templ + = &prebuilt->mysql_template[i]; templ->rec_field_no = templ->clust_rec_field_no; } } @@ -5157,7 +5309,7 @@ no_commit: /* Build the template used in converting quickly between the two database formats */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } innodb_srv_conc_enter_innodb(prebuilt->trx); @@ -5858,8 +6010,7 @@ ha_innobase::index_read( necessarily prebuilt->index, but can also be the clustered index */ if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, this, - ROW_MYSQL_REC_FIELDS); + build_template(false); } if (key_ptr) { @@ -6069,7 +6220,7 @@ ha_innobase::change_active_index( the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary copying. Starting from MySQL-4.1 we use a more efficient flag here. */ - build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS); + build_template(false); DBUG_RETURN(0); } @@ -8406,7 +8557,7 @@ ha_innobase::check( /* Build the template; we will use a dummy template in index scans done in checking */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } if (prebuilt->table->ibd_file_missing) { @@ -8898,12 +9049,7 @@ ha_innobase::extra( } break; case HA_EXTRA_RESET_STATE: - reset_template(prebuilt); - /* Reset index condition pushdown state */ - pushed_idx_cond= FALSE; - pushed_idx_cond_keyno= MAX_KEY; - prebuilt->idx_cond_func= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); break; case HA_EXTRA_NO_KEYREAD: prebuilt->read_just_key = 0; @@ -8949,14 +9095,8 @@ ha_innobase::reset() row_mysql_prebuilt_free_blob_heap(prebuilt); } - reset_template(prebuilt); - - /* Reset index condition pushdown state */ - pushed_idx_cond_keyno= MAX_KEY; - pushed_idx_cond= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); ds_mrr.dsmrr_close(); - prebuilt->idx_cond_func= NULL; /* TODO: This should really be reset in reset_template() but for now it's safer to do it explicitly here. */ @@ -9006,7 +9146,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -9125,7 +9265,7 @@ ha_innobase::external_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { @@ -9308,7 +9448,7 @@ ha_innobase::transactional_table_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { prebuilt->select_lock_type = LOCK_X; @@ -12153,39 +12293,47 @@ bool ha_innobase::is_thd_killed() * Index Condition Pushdown interface implementation */ -C_MODE_START - -/* - Index condition check function to be called from within Innobase. - See note on ICP_RESULT for return values description. -*/ - -static xtradb_icp_result_t index_cond_func_innodb(void *arg) +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +extern "C" UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ { - ha_innobase *h= (ha_innobase*)arg; + ha_innobase *h= (ha_innobase*) file; + if (h->is_thd_killed()) - return XTRADB_ICP_ABORTED_BY_USER; + return ICP_ABORTED_BY_USER; if (h->end_range) { if (h->compare_key2(h->end_range) > 0) - return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ + return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ } - return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH; + return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH; } -C_MODE_END - - -Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) +/** Attempt to push down an index condition. +* @param[in] keyno MySQL key number +* @param[in] idx_cond Index condition to be checked +* @return idx_cond if pushed; NULL if not pushed +*/ +UNIV_INTERN +class Item* +ha_innobase::idx_cond_push( + uint keyno, + class Item* idx_cond) { - if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X) - { - pushed_idx_cond_keyno= keyno_arg; - pushed_idx_cond= idx_cond_arg; - in_range_check_pushed_down= TRUE; - return NULL; /* Table handler will check the entire condition */ - } - return idx_cond_arg; /* Table handler will not make any checks */ + DBUG_ENTER("ha_innobase::idx_cond_push"); + DBUG_ASSERT(keyno != MAX_KEY); + DBUG_ASSERT(idx_cond != NULL); + + pushed_idx_cond = idx_cond; + pushed_idx_cond_keyno = keyno; + in_range_check_pushed_down = TRUE; + /* Table handler will check the entire condition */ + DBUG_RETURN(NULL); } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 28fccd8dd14..bcd7b5926e0 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -223,26 +223,79 @@ class ha_innobase: public handler bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); bool check_if_supported_virtual_columns(void) { return TRUE; } +private: + /** Builds a 'template' to the prebuilt struct. + + The template is used in fast retrieval of just those column + values MySQL needs in its processing. + @param whole_row true if access is needed to a whole row, + false if accessing individual fields is enough */ + void build_template(bool whole_row); + /** Resets a query execution 'template'. + @see build_template() */ + inline void reset_template(); + public: - /** - * Multi Range Read interface - */ - int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); - int multi_range_read_next(range_id_t *range_info); - ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, - uint n_ranges, uint *bufsz, + /** @name Multi Range Read interface @{ */ + /** Initialize multi range read @see DsMrr_impl::dsmrr_init + * @param seq + * @param seq_init_param + * @param n_ranges + * @param mode + * @param buf + */ + int multi_range_read_init(RANGE_SEQ_IF* seq, + void* seq_init_param, + uint n_ranges, uint mode, + HANDLER_BUFFER *buf); + /** Process next multi range read @see DsMrr_impl::dsmrr_next + * @param range_info + */ + int multi_range_read_next(range_id_t *range_info); + /** Initialize multi range read and get information. + * @see ha_myisam::multi_range_read_info_const + * @see DsMrr_impl::dsmrr_info_const + * @param keyno + * @param seq + * @param seq_init_param + * @param n_ranges + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *flags, COST_VECT *cost); + /** Initialize multi range read and get information. + * @see DsMrr_impl::dsmrr_info + * @param keyno + * @param n_ranges + * @param keys + * @param key_parts + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost); - ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint key_parts, uint *bufsz, - uint *flags, COST_VECT *cost); - DsMrr_impl ds_mrr; - Item *idx_cond_push(uint keyno, Item* idx_cond); + /** Attempt to push down an index condition. + * @param[in] keyno MySQL key number + * @param[in] idx_cond Index condition to be checked + * @return idx_cond if pushed; NULL if not pushed + */ + class Item* idx_cond_push(uint keyno, class Item* idx_cond); - /* An helper function for index_cond_func_innodb: */ - bool is_thd_killed(); + /* An helper function for index_cond_func_innodb: */ + bool is_thd_killed(); + +private: + /** The multi range read session object */ + DsMrr_impl ds_mrr; + + /* @} */ }; /* Some accessor functions which the InnoDB plugin needs, but which diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 7baacdd6055..02a3cc5ea55 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -839,6 +839,18 @@ dict_index_get_nth_col_pos( const dict_index_t* index, /*!< in: index */ ulint n); /*!< in: column number */ /********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix); /*!< in: TRUE=consider + column prefixes too */ +/********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ UNIV_INTERN diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index bd7534dc7e2..02527d8edd2 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -656,6 +656,20 @@ dict_index_get_nth_col_no( return(dict_col_get_no(dict_index_get_nth_col(index, pos))); } +/********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INLINE +ulint +dict_index_get_nth_col_pos( +/*=======================*/ + const dict_index_t* index, /*!< in: index */ + ulint n) /*!< in: column number */ +{ + return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE)); +} + #ifndef UNIV_HOTBACKUP /********************************************************************//** Returns the minimum data size of an index record. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 445d94eeabb..db71c37afc3 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -247,6 +247,15 @@ innobase_get_at_most_n_mbchars( ulint data_len, /*!< in: length of the string in bytes */ const char* str); /*!< in: character string */ +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ + __attribute__((nonnull, warn_unused_result)); /******************************************************************//** Returns true if the thread supports XA, global value of innodb_supports_xa if thd is NULL. diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 88de6421cf9..76f1ee21414 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -539,6 +539,10 @@ struct mysql_row_templ_struct { Innobase record in the clustered index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + ulint icp_rec_field_no; /*!< field number of the column in an + Innobase record in the current index; + not defined unless + index condition pushdown is used */ ulint mysql_col_offset; /*!< offset of the column in the MySQL row format */ ulint mysql_col_len; /*!< length of the column in the MySQL @@ -577,16 +581,6 @@ struct mysql_row_templ_struct { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 - -typedef enum xtradb_icp_result { - XTRADB_ICP_ERROR=-1, - XTRADB_ICP_NO_MATCH=0, - XTRADB_ICP_MATCH=1, - XTRADB_ICP_OUT_OF_RANGE=2, - XTRADB_ICP_ABORTED_BY_USER=3, -} xtradb_icp_result_t; - -typedef xtradb_icp_result_t (*index_cond_func_t)(void *param); /** A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ @@ -784,16 +778,15 @@ struct row_prebuilt_struct { store it here so that we can return it to MySQL */ /*----------------------*/ + void* idx_cond; /*!< In ICP, pointer to a ha_innobase, + passed to innobase_index_cond(). + NULL if index condition pushdown is + not used. */ + ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols. + 0 if and only if idx_cond == NULL. */ + /*----------------------*/ ulint magic_n2; /*!< this should be the same as magic_n */ - /*----------------------*/ - index_cond_func_t idx_cond_func;/* Index Condition Pushdown function, - or NULL if there is none set */ - void* idx_cond_func_arg;/* ICP function argument */ - ulint n_index_fields; /* Number of fields at the start of - mysql_template. Valid only when using - ICP. */ - /*----------------------*/ }; #define ROW_PREBUILT_FETCH_MAGIC_N 465765687 diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 13dccdffb96..adb244ecee7 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -58,6 +58,8 @@ Created 12/19/1997 Heikki Tuuri #include "buf0lru.h" #include "ha_prototypes.h" +#include "my_handler.h" /* enum icp_result */ + /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2673,6 +2675,147 @@ row_sel_field_store_in_mysql_format( } } +/**************************************************************//** +Convert a field in the Innobase format to a field in the MySQL format. */ +static __attribute__((warn_unused_result)) +ibool +row_sel_store_mysql_field( +/*======================*/ + byte* mysql_rec, /*!< out: record in the + MySQL format */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ + const rec_t* rec, /*!< in: InnoDB record; + must be protected by + a page latch */ + const ulint* offsets, /*!< in: array returned by + rec_get_offsets() */ + ulint field_no, /*!< in: templ->rec_field_no or + templ->clust_rec_field_no */ + const mysql_row_templ_t*templ) /*!< in: row template */ +{ + const byte* data; + ulint len; + + ut_ad(prebuilt->default_rec); + ut_ad(templ); + ut_ad(templ >= prebuilt->mysql_template); + ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); + ut_ad(field_no == templ->clust_rec_field_no + || field_no == templ->rec_field_no + || field_no == templ->icp_rec_field_no); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { + + mem_heap_t* heap; + /* Copy an externally stored field to a temporary heap */ + + ut_a(!prebuilt->trx->has_search_latch); + ut_ad(field_no == templ->clust_rec_field_no); + + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } + + heap = prebuilt->blob_heap; + } else { + heap = mem_heap_create(UNIV_PAGE_SIZE); + } + + /* NOTE: if we are retrieving a big BLOB, we may + already run out of memory in the next call, which + causes an assert */ + + data = btr_rec_copy_externally_stored_field( + rec, offsets, + dict_table_zip_size(prebuilt->table), + field_no, &len, heap); + + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field was not written + yet. This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); + } + + ut_a(prebuilt->trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + return(FALSE); + } + + ut_a(len != UNIV_SQL_NULL); + + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); + + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); + } + } else { + /* Field is stored in the row. */ + + data = rec_get_nth_field(rec, offsets, field_no, &len); + + if (len == UNIV_SQL_NULL) { + /* MySQL assumes that the field for an SQL + NULL value is set to the default value. */ + ut_ad(templ->mysql_null_bit_mask); + + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); + mysql_rec[templ->mysql_null_byte_offset] + |= (byte) templ->mysql_null_bit_mask; + memcpy(mysql_rec + templ->mysql_col_offset, + (const byte*) prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); + return(TRUE); + } + + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + + /* It is a BLOB field locally stored in the + InnoDB record: we MUST copy its contents to + prebuilt->blob_heap here because + row_sel_field_store_in_mysql_format() stores a + pointer to the data, and the data passed to us + will be invalid as soon as the + mini-transaction is committed and the page + latch on the clustered index page is + released. */ + + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } + + data = mem_heap_dup(prebuilt->blob_heap, data, len); + } + + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); + } + + ut_ad(len != UNIV_SQL_NULL); + + if (templ->mysql_null_bit_mask) { + /* It is a nullable column with a non-NULL + value */ + mysql_rec[templ->mysql_null_byte_offset] + &= ~(byte) templ->mysql_null_bit_mask; + } + + return(TRUE); +} + /**************************************************************//** Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few @@ -2692,136 +2835,26 @@ row_sel_store_mysql_rec( ibool rec_clust, /*!< in: TRUE if rec is in the clustered index instead of prebuilt->index */ - const ulint* offsets, /* in: array returned by - rec_get_offsets() */ - ulint start_field_no, /* in: start from this field */ - ulint end_field_no) /* in: end at this field */ + const ulint* offsets) /*!< in: array returned by + rec_get_offsets(rec) */ { - mem_heap_t* extern_field_heap = NULL; - mem_heap_t* heap; ulint i; - ut_ad(prebuilt->mysql_template); - ut_ad(prebuilt->default_rec); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { mem_heap_free(prebuilt->blob_heap); prebuilt->blob_heap = NULL; } - for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) { + for (i = 0; i < prebuilt->n_template; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; - const mysql_row_templ_t*templ = prebuilt->mysql_template + i; - const byte* data; - ulint len; - ulint field_no; - - field_no = rec_clust - ? templ->clust_rec_field_no : templ->rec_field_no; - - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { - - /* Copy an externally stored field to the temporary - heap */ - - ut_a(!prebuilt->trx->has_search_latch); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - heap = prebuilt->blob_heap; - } else { - extern_field_heap - = mem_heap_create(UNIV_PAGE_SIZE); - - heap = extern_field_heap; - } - - /* NOTE: if we are retrieving a big BLOB, we may - already run out of memory in the next call, which - causes an assert */ - - data = btr_rec_copy_externally_stored_field( - rec, offsets, - dict_table_zip_size(prebuilt->table), - field_no, &len, heap); - - if (UNIV_UNLIKELY(!data)) { - /* The externally stored field - was not written yet. This - record should only be seen by - recv_recovery_rollback_active() - or any TRX_ISO_READ_UNCOMMITTED - transactions. */ - - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - } - - return(FALSE); - } - - ut_a(len != UNIV_SQL_NULL); - } else { - /* Field is stored in the row. */ - - data = rec_get_nth_field(rec, offsets, field_no, &len); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB) - && len != UNIV_SQL_NULL) { - - /* It is a BLOB field locally stored in the - InnoDB record: we MUST copy its contents to - prebuilt->blob_heap here because later code - assumes all BLOB values have been copied to a - safe place. */ - - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - data = memcpy(mem_heap_alloc( - prebuilt->blob_heap, len), - data, len); - } - } - - if (len != UNIV_SQL_NULL) { - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, data, len); - - /* Cleanup */ - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - extern_field_heap = NULL; - } - - if (templ->mysql_null_bit_mask) { - /* It is a nullable column with a non-NULL - value */ - mysql_rec[templ->mysql_null_byte_offset] - &= ~(byte) templ->mysql_null_bit_mask; - } - } else { - /* MySQL assumes that the field for an SQL - NULL value is set to the default value. */ - - UNIV_MEM_ASSERT_RW(prebuilt->default_rec - + templ->mysql_col_offset, - templ->mysql_col_len); - mysql_rec[templ->mysql_null_byte_offset] - |= (byte) templ->mysql_null_bit_mask; - memcpy(mysql_rec + templ->mysql_col_offset, - (const byte*) prebuilt->default_rec - + templ->mysql_col_offset, - templ->mysql_col_len); + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + rec_clust + ? templ->clust_rec_field_no + : templ->rec_field_no, + templ)) { + return(FALSE); } } @@ -3192,31 +3225,19 @@ UNIV_INLINE __attribute__((warn_unused_result)) ibool row_sel_push_cache_row_for_mysql( /*=============================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: record to push, in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const ulint* offsets, /* in: rec_get_offsets() */ - ulint start_field_no, /* in: start from this field */ - byte* remainder_buf) /* in: if start_field_no !=0, - where to take prev fields */ + byte* mysql_rec, /*!< in/out: MySQL record */ + row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ { - byte* buf; - ulint i; - ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); ut_a(!prebuilt->templ_contains_blob); - if (prebuilt->fetch_cache[0] == NULL) { + if (UNIV_UNLIKELY(prebuilt->fetch_cache[0] == NULL)) { + ulint i; /* Allocate memory for the fetch cache */ + ut_ad(prebuilt->n_fetch_cached == 0); for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { + byte* buf; /* A user has reported memory corruption in these buffers in Linux. Put magic numbers there to help @@ -3236,46 +3257,14 @@ row_sel_push_cache_row_for_mysql( UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], prebuilt->mysql_row_len); - if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( - prebuilt->fetch_cache[ - prebuilt->n_fetch_cached], - prebuilt, - rec, - rec_clust, - offsets, - start_field_no, - prebuilt->n_template))) { + memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + mysql_rec, prebuilt->mysql_row_len); + + if (++prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) { return(FALSE); } - if (start_field_no) { - - for (i=0; i < start_field_no; i++) { - register ulint offs; - mysql_row_templ_t* templ; - register byte * null_byte; - - templ = prebuilt->mysql_template + i; - - if (templ->mysql_null_bit_mask) { - offs = templ->mysql_null_byte_offset; - - null_byte= prebuilt->fetch_cache[ - prebuilt->n_fetch_cached]+offs; - (*null_byte)&= ~templ->mysql_null_bit_mask; - (*null_byte)|= (*(remainder_buf + offs) & - templ->mysql_null_bit_mask); - } - - offs = templ->mysql_col_offset; - memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached] - + offs, - remainder_buf + offs, - templ->mysql_col_len); - } - } - - prebuilt->n_fetch_cached++; + row_sel_pop_cached_row_for_mysql(mysql_rec, prebuilt); return(TRUE); } @@ -3353,6 +3342,81 @@ row_sel_try_search_shortcut_for_mysql( return(SEL_FOUND); } +/*********************************************************************//** +Check a pushed-down index condition. +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +static +enum icp_result +row_search_idx_cond_check( +/*======================*/ + byte* mysql_rec, /*!< out: record + in MySQL format (invalid unless + prebuilt->idx_cond!=NULL and + we return ICP_MATCH) */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct + for the table handle */ + const rec_t* rec, /*!< in: InnoDB record */ + const ulint* offsets) /*!< in: rec_get_offsets() */ +{ + enum icp_result result; + ulint i; + + ut_ad(rec_offs_validate(rec, prebuilt->index, offsets)); + + if (!prebuilt->idx_cond) { + return(ICP_MATCH); + } + + /* Convert to MySQL format those fields that are needed for + evaluating the index condition. */ + + if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { + mem_heap_empty(prebuilt->blob_heap); + } + + for (i = 0; i < prebuilt->idx_cond_n_cols; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; + + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + templ->icp_rec_field_no, + templ)) { + return(ICP_NO_MATCH); + } + } + + /* We assume that the index conditions on + case-insensitive columns are case-insensitive. The + case of such columns may be wrong in a secondary + index, if the case of the column has been updated in + the past, or a record has been deleted and a record + inserted in a different case. */ + result = innobase_index_cond(prebuilt->idx_cond); + switch (result) { + case ICP_MATCH: + /* Convert the remaining fields to MySQL format. + If this is a secondary index record, we must defer + this until we have fetched the clustered index record. */ + if (!prebuilt->need_to_access_clustered + || dict_index_is_clust(prebuilt->index)) { + if (!row_sel_store_mysql_rec(mysql_rec, prebuilt, + rec, + FALSE, offsets)) { + ut_ad(dict_index_is_clust(prebuilt->index)); + result = ICP_NO_MATCH; + } + } + /* fall through */ + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + return(result); + default: ; + } + + ut_error; +} + /********************************************************************//** Searches for rows in the database. This is used in the interface to MySQL. This function opens a cursor, and also implements fetch next @@ -3415,10 +3479,8 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ibool some_fields_in_buffer; ibool table_lock_waited = FALSE; ibool problematic_use = FALSE; - ibool get_clust_rec = 0; rec_offs_init(offsets_); @@ -3681,10 +3743,24 @@ row_search_for_mysql( mtr_commit(&mtr). */ ut_ad(!rec_get_deleted_flag(rec, comp)); + if (prebuilt->idx_cond) { + switch (row_search_idx_cond_check( + buf, prebuilt, + rec, offsets)) { + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + goto shortcut_mismatch; + case ICP_MATCH: + goto shortcut_match; + default: ; + } + ut_error; + } + if (!row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, - prebuilt->n_template)) { + offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -3695,13 +3771,12 @@ row_search_for_mysql( rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); /* Proceed as in case SEL_RETRY. */ break; } + shortcut_match: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3713,6 +3788,7 @@ row_search_for_mysql( goto release_search_latch_if_needed; case SEL_EXHAUSTED: + shortcut_mismatch: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3804,8 +3880,9 @@ retry_check: if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ - if (trx->read_view == NULL - && prebuilt->select_lock_type == LOCK_NONE) { + if (UNIV_UNLIKELY + (trx->read_view == NULL + && prebuilt->select_lock_type == LOCK_NONE)) { fputs("InnoDB: Error: MySQL is trying to" " perform a consistent read\n" @@ -4265,6 +4342,16 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); + mutex_exit(&kernel_mutex); + + if (old_vers == NULL) { + /* The row was not yet committed */ + + goto next_rec; + } + + did_semi_consistent_read = TRUE; + rec = old_vers; } else { mutex_exit(&kernel_mutex); @@ -4275,19 +4362,7 @@ no_gap_lock: offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - err = DB_SUCCESS; - break; } - mutex_exit(&kernel_mutex); - - if (old_vers == NULL) { - /* The row was not yet committed */ - - goto next_rec; - } - - did_semi_consistent_read = TRUE; - rec = old_vers; break; default: @@ -4346,8 +4421,27 @@ no_gap_lock: if (!lock_sec_rec_cons_read_sees( rec, trx->read_view)) { - get_clust_rec = TRUE; - goto idx_cond_check; + /* We should look at the clustered index. + However, as this is a non-locking read, + we can skip the clustered index lookup if + the condition does not match the secondary + index entry. */ + switch (row_search_idx_cond_check( + buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + goto next_rec; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_MATCH: + goto requires_clust_rec; + default: ; + } + + ut_error; } } } @@ -4392,38 +4486,31 @@ no_gap_lock: goto next_rec; } - -idx_cond_check: - if (prebuilt->idx_cond_func) { - int res; - ibool ib_res; - ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, prebuilt->n_index_fields); - /* - The above call will fail and return FALSE when requested to - store an "externally stored column" (afaiu, a blob). Index - Condition Pushdown is not supported for indexes with blob - columns, so we should never get this error. - */ - ut_ad(ib_res); - res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg); - if (res == XTRADB_ICP_NO_MATCH) - goto next_rec; - else if (res != XTRADB_ICP_MATCH) { - err= (res == XTRADB_ICP_ABORTED_BY_USER ? - DB_SEARCH_ABORTED_BY_USER : - DB_RECORD_NOT_FOUND); - goto idx_cond_failed; - } - /* res == XTRADB_ICP_MATCH */ - } + /* Check if the record matches the index condition. */ + switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + if (did_semi_consistent_read) { + row_unlock_for_mysql(prebuilt, TRUE); + } + goto next_rec; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_MATCH: + break; + default: + ut_error; + } /* Get the clustered index record if needed, if we did not do the search using the clustered index. */ - if (get_clust_rec || (index != clust_index - && prebuilt->need_to_access_clustered)) { + if (index != clust_index && prebuilt->need_to_access_clustered) { + +requires_clust_rec: + ut_ad(index != clust_index); /* We use a 'goto' to the preceding label if a consistent read of a secondary index record requires us to look up old @@ -4487,6 +4574,19 @@ idx_cond_check: result_rec = clust_rec; ut_ad(rec_offs_validate(result_rec, clust_index, offsets)); + + if (prebuilt->idx_cond) { + /* Convert the remaining fields to + MySQL format. We were unable to do + this in row_search_idx_cond_check(), + because the condition is on the + secondary index and the requested + column is in the clustered index. */ + if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, + TRUE, offsets)) { + goto next_rec; + } + } } else { result_rec = rec; } @@ -4520,15 +4620,10 @@ idx_cond_check: are BLOBs in the fields to be fetched. In HANDLER we do not cache rows because there the cursor is a scrollable cursor. */ - some_fields_in_buffer = (index != clust_index - && prebuilt->idx_cond_func); - if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - result_rec != rec, - offsets, - some_fields_in_buffer? - prebuilt->n_index_fields : 0, - buf)) { + if (!prebuilt->idx_cond + && !row_sel_store_mysql_rec(buf, prebuilt, result_rec, + result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do not exist. Such records may only be @@ -4536,14 +4631,10 @@ idx_cond_check: level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); - } else if (prebuilt->n_fetch_cached - == MYSQL_FETCH_CACHE_SIZE) { - - goto got_row; + goto next_rec; + } else if (row_sel_push_cache_row_for_mysql(buf, prebuilt)) { + goto next_rec; } - - goto next_rec; } else { if (UNIV_UNLIKELY (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) { @@ -4564,15 +4655,11 @@ idx_cond_check: rec_offs_size(offsets)); mach_write_to_4(buf, rec_offs_extra_size(offsets) + 4); - } else { - /* Returning a row to MySQL */ - - if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, - result_rec != rec, - offsets, - prebuilt->idx_cond_func? - prebuilt->n_index_fields: 0, - prebuilt->n_template)) { + } else if (!prebuilt->idx_cond) { + /* The record was not yet converted to MySQL format. */ + if (!row_sel_store_mysql_rec( + buf, prebuilt, + result_rec, result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do @@ -4581,8 +4668,6 @@ idx_cond_check: isolation level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); goto next_rec; } } @@ -4600,7 +4685,6 @@ idx_cond_check: /* From this point on, 'offsets' are invalid. */ -got_row: /* We have an optimization to save CPU time: if this is a consistent read on a unique condition on the clustered index, then we do not store the pcur position, because any fetch next or prev will anyway @@ -4624,7 +4708,6 @@ idx_cond_failed: next_rec: /* Reset the old and new "did semi-consistent read" flags. */ - get_clust_rec = FALSE; if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; @@ -4635,6 +4718,7 @@ next_rec: /*-------------------------------------------------------------*/ /* PHASE 5: Move the cursor to the next index record */ + /*TODO: with ICP, do this when switching pages, every N pages */ if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the From d86ad1861a57558e63dc68bf372eff6bdb22cb6d Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Fri, 27 May 2011 15:20:19 -0700 Subject: [PATCH 002/359] Backported the test case for bug 43617 fixed by the patch for bug 42580. Backported the test case for bug 49906 fixed by the patch for LP bug 625841. Slightly optimized the code of the fix for LP bug 625841. --- mysql-test/include/icp_tests.inc | 73 ++++++++++++++++++++++++++++++++ mysql-test/r/innodb_icp.result | 69 ++++++++++++++++++++++++++++++ mysql-test/r/innodb_mrr.result | 1 + mysql-test/r/maria_icp.result | 69 ++++++++++++++++++++++++++++++ mysql-test/r/myisam_icp.result | 69 ++++++++++++++++++++++++++++++ mysql-test/t/innodb_mrr.test | 1 + sql/filesort.cc | 11 +++-- 7 files changed, 287 insertions(+), 6 deletions(-) diff --git a/mysql-test/include/icp_tests.inc b/mysql-test/include/icp_tests.inc index 52f89ec903a..af52e7f30d4 100644 --- a/mysql-test/include/icp_tests.inc +++ b/mysql-test/include/icp_tests.inc @@ -86,6 +86,78 @@ SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c1,c2 LIMIT 2; --echo DROP TABLE t1; +--echo # +--echo # Bug#43617 - Innodb returns wrong results with timestamp's range value +--echo # in IN clause +--echo # (Note: Fixed by patch for BUG#42580) +--echo # + +CREATE TABLE t1( + c1 TIMESTAMP NOT NULL, + c2 TIMESTAMP NULL, + c3 DATE, + c4 DATETIME, + PRIMARY KEY(c1), + UNIQUE INDEX(c2) +); + +INSERT INTO t1 VALUES + ('0000-00-00 00:00:00','0000-00-00 00:00:00','2008-01-04','2008-01-05 00:00:00'), + ('1971-01-01 00:00:01','1980-01-01 00:00:01','2009-01-01','2009-01-02 00:00:00'), + ('1999-01-01 00:00:00','1999-01-01 00:00:00', NULL, NULL), + ('2007-05-23 09:15:28','2007-05-23 09:15:28','2007-05-24','2007-05-24 09:15:28'), + ('2007-05-27 00:00:00','2007-05-25 00:00:00','2007-05-26','2007-05-26 00:00:00'), + ('2008-01-01 00:00:00', NULL, '2008-01-02','2008-01-03 00:00:00'), + ('2009-01-29 11:11:27','2009-01-29 11:11:27','2009-01-29','2009-01-29 11:11:27'), + ('2038-01-09 03:14:07','2038-01-09 03:14:07','2009-01-05','2009-01-06 00:00:00'); + +--echo +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2; + +--echo +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 LIMIT 2; + +--echo +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC; + +--echo +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC LIMIT 2; + +--echo +DROP TABLE t1; + +--echo # +--echo # BUG#49906: Assertion failed - Field_varstring::val_str in field.cc +--echo # (Note: Fixed by patch for LP BUG#625841) +--echo # + +CREATE TABLE t1 ( + f1 VARCHAR(1024), + f2 VARCHAR(10), + INDEX test_idx USING BTREE (f2,f1(5)) +); + +INSERT INTO t1 VALUES ('a','c'), ('b','d'); + +SELECT f1 +FROM t1 +WHERE f2 LIKE 'd' +ORDER BY f1; + +DROP TABLE t1; + --echo # --echo # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on --echo # @@ -102,6 +174,7 @@ SELECT * FROM t WHERE a > 2 FOR UPDATE; DROP TABLE t; + --echo # --echo # Bug#35080 - Innodb crash at mem_block_get_len line 72 --echo # diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result index c3643e62caa..729ad81fa25 100644 --- a/mysql-test/r/innodb_icp.result +++ b/mysql-test/r/innodb_icp.result @@ -79,6 +79,75 @@ c1 c2 c3 c4 2008-01-01 NULL 2008-01-02 00:00:00 2008-01-03 00:00:00 2008-01-17 NULL NULL 2009-01-29 00:00:00 +DROP TABLE t1; +# +# Bug#43617 - Innodb returns wrong results with timestamp's range value +# in IN clause +# (Note: Fixed by patch for BUG#42580) +# +CREATE TABLE t1( +c1 TIMESTAMP NOT NULL, +c2 TIMESTAMP NULL, +c3 DATE, +c4 DATETIME, +PRIMARY KEY(c1), +UNIQUE INDEX(c2) +); +INSERT INTO t1 VALUES +('0000-00-00 00:00:00','0000-00-00 00:00:00','2008-01-04','2008-01-05 00:00:00'), +('1971-01-01 00:00:01','1980-01-01 00:00:01','2009-01-01','2009-01-02 00:00:00'), +('1999-01-01 00:00:00','1999-01-01 00:00:00', NULL, NULL), +('2007-05-23 09:15:28','2007-05-23 09:15:28','2007-05-24','2007-05-24 09:15:28'), +('2007-05-27 00:00:00','2007-05-25 00:00:00','2007-05-26','2007-05-26 00:00:00'), +('2008-01-01 00:00:00', NULL, '2008-01-02','2008-01-03 00:00:00'), +('2009-01-29 11:11:27','2009-01-29 11:11:27','2009-01-29','2009-01-29 11:11:27'), +('2038-01-09 03:14:07','2038-01-09 03:14:07','2009-01-05','2009-01-06 00:00:00'); + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +DROP TABLE t1; +# +# BUG#49906: Assertion failed - Field_varstring::val_str in field.cc +# (Note: Fixed by patch for LP BUG#625841) +# +CREATE TABLE t1 ( +f1 VARCHAR(1024), +f2 VARCHAR(10), +INDEX test_idx USING BTREE (f2,f1(5)) +); +INSERT INTO t1 VALUES ('a','c'), ('b','d'); +SELECT f1 +FROM t1 +WHERE f2 LIKE 'd' +ORDER BY f1; +f1 +b DROP TABLE t1; # # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on diff --git a/mysql-test/r/innodb_mrr.result b/mysql-test/r/innodb_mrr.result index fd61460bd79..d244157b61b 100644 --- a/mysql-test/r/innodb_mrr.result +++ b/mysql-test/r/innodb_mrr.result @@ -726,6 +726,7 @@ JA USA DROP TABLE t1,t2; # # Testcase backport: Bug#43249 +# (Note: Fixed by patch for BUG#42580) # CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=innodb; INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); diff --git a/mysql-test/r/maria_icp.result b/mysql-test/r/maria_icp.result index 701dd5a4bd5..14a450d13ca 100644 --- a/mysql-test/r/maria_icp.result +++ b/mysql-test/r/maria_icp.result @@ -79,6 +79,75 @@ c1 c2 c3 c4 2008-01-01 NULL 2008-01-02 00:00:00 2008-01-03 00:00:00 2008-01-17 NULL NULL 2009-01-29 00:00:00 +DROP TABLE t1; +# +# Bug#43617 - Innodb returns wrong results with timestamp's range value +# in IN clause +# (Note: Fixed by patch for BUG#42580) +# +CREATE TABLE t1( +c1 TIMESTAMP NOT NULL, +c2 TIMESTAMP NULL, +c3 DATE, +c4 DATETIME, +PRIMARY KEY(c1), +UNIQUE INDEX(c2) +); +INSERT INTO t1 VALUES +('0000-00-00 00:00:00','0000-00-00 00:00:00','2008-01-04','2008-01-05 00:00:00'), +('1971-01-01 00:00:01','1980-01-01 00:00:01','2009-01-01','2009-01-02 00:00:00'), +('1999-01-01 00:00:00','1999-01-01 00:00:00', NULL, NULL), +('2007-05-23 09:15:28','2007-05-23 09:15:28','2007-05-24','2007-05-24 09:15:28'), +('2007-05-27 00:00:00','2007-05-25 00:00:00','2007-05-26','2007-05-26 00:00:00'), +('2008-01-01 00:00:00', NULL, '2008-01-02','2008-01-03 00:00:00'), +('2009-01-29 11:11:27','2009-01-29 11:11:27','2009-01-29','2009-01-29 11:11:27'), +('2038-01-09 03:14:07','2038-01-09 03:14:07','2009-01-05','2009-01-06 00:00:00'); + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +DROP TABLE t1; +# +# BUG#49906: Assertion failed - Field_varstring::val_str in field.cc +# (Note: Fixed by patch for LP BUG#625841) +# +CREATE TABLE t1 ( +f1 VARCHAR(1024), +f2 VARCHAR(10), +INDEX test_idx USING BTREE (f2,f1(5)) +); +INSERT INTO t1 VALUES ('a','c'), ('b','d'); +SELECT f1 +FROM t1 +WHERE f2 LIKE 'd' +ORDER BY f1; +f1 +b DROP TABLE t1; # # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on diff --git a/mysql-test/r/myisam_icp.result b/mysql-test/r/myisam_icp.result index 742792ec772..41f0d411be4 100644 --- a/mysql-test/r/myisam_icp.result +++ b/mysql-test/r/myisam_icp.result @@ -77,6 +77,75 @@ c1 c2 c3 c4 2008-01-01 NULL 2008-01-02 00:00:00 2008-01-03 00:00:00 2008-01-17 NULL NULL 2009-01-29 00:00:00 +DROP TABLE t1; +# +# Bug#43617 - Innodb returns wrong results with timestamp's range value +# in IN clause +# (Note: Fixed by patch for BUG#42580) +# +CREATE TABLE t1( +c1 TIMESTAMP NOT NULL, +c2 TIMESTAMP NULL, +c3 DATE, +c4 DATETIME, +PRIMARY KEY(c1), +UNIQUE INDEX(c2) +); +INSERT INTO t1 VALUES +('0000-00-00 00:00:00','0000-00-00 00:00:00','2008-01-04','2008-01-05 00:00:00'), +('1971-01-01 00:00:01','1980-01-01 00:00:01','2009-01-01','2009-01-02 00:00:00'), +('1999-01-01 00:00:00','1999-01-01 00:00:00', NULL, NULL), +('2007-05-23 09:15:28','2007-05-23 09:15:28','2007-05-24','2007-05-24 09:15:28'), +('2007-05-27 00:00:00','2007-05-25 00:00:00','2007-05-26','2007-05-26 00:00:00'), +('2008-01-01 00:00:00', NULL, '2008-01-02','2008-01-03 00:00:00'), +('2009-01-29 11:11:27','2009-01-29 11:11:27','2009-01-29','2009-01-29 11:11:27'), +('2038-01-09 03:14:07','2038-01-09 03:14:07','2009-01-05','2009-01-06 00:00:00'); + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +SELECT * +FROM t1 +WHERE c2 IN ('1971-01-01 00:00:01','2038-01-09 03:14:07') +ORDER BY c2 DESC LIMIT 2; +c1 c2 c3 c4 +2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 + +DROP TABLE t1; +# +# BUG#49906: Assertion failed - Field_varstring::val_str in field.cc +# (Note: Fixed by patch for LP BUG#625841) +# +CREATE TABLE t1 ( +f1 VARCHAR(1024), +f2 VARCHAR(10), +INDEX test_idx USING BTREE (f2,f1(5)) +); +INSERT INTO t1 VALUES ('a','c'), ('b','d'); +SELECT f1 +FROM t1 +WHERE f2 LIKE 'd' +ORDER BY f1; +f1 +b DROP TABLE t1; # # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on diff --git a/mysql-test/t/innodb_mrr.test b/mysql-test/t/innodb_mrr.test index 7a3dc9f668e..bb0e9c10d39 100644 --- a/mysql-test/t/innodb_mrr.test +++ b/mysql-test/t/innodb_mrr.test @@ -417,6 +417,7 @@ DROP TABLE t1,t2; --echo # --echo # Testcase backport: Bug#43249 +--echo # (Note: Fixed by patch for BUG#42580) --echo # CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=innodb; INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); diff --git a/sql/filesort.cc b/sql/filesort.cc index 6e3bf27afcc..b9cda236b0c 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -547,12 +547,11 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select, /* Temporary set for register_used_fields and register_field_in_read_map */ sort_form->read_set= &sort_form->tmp_set; register_used_fields(param); - if (select && select->cond) - select->cond->walk(&Item::register_field_in_read_map, 1, - (uchar*) sort_form); - if (select && select->pre_idx_push_select_cond) - select->pre_idx_push_select_cond->walk(&Item::register_field_in_read_map, - 1, (uchar*) sort_form); + Item *sort_cond= !select ? + 0 : !select->pre_idx_push_select_cond ? + select->cond : select->pre_idx_push_select_cond; + if (sort_cond) + sort_cond->walk(&Item::register_field_in_read_map, 1, (uchar*) sort_form); sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set, &sort_form->tmp_set); From 42cc59a8c138e9416326b3c4b4f8d64f0bd48728 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Fri, 27 May 2011 17:04:29 -0700 Subject: [PATCH 003/359] Backported the test case for bug 43618 from mysql code line. --- mysql-test/include/icp_tests.inc | 41 ++++++++++++++++++++++++++++++++ mysql-test/r/innodb_icp.result | 36 ++++++++++++++++++++++++++++ mysql-test/r/maria_icp.result | 36 ++++++++++++++++++++++++++++ mysql-test/r/myisam_icp.result | 36 ++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+) diff --git a/mysql-test/include/icp_tests.inc b/mysql-test/include/icp_tests.inc index af52e7f30d4..4b28faa8e41 100644 --- a/mysql-test/include/icp_tests.inc +++ b/mysql-test/include/icp_tests.inc @@ -138,6 +138,47 @@ ORDER BY c2 DESC LIMIT 2; --echo DROP TABLE t1; +--echo # +--echo # BUG#43618: MyISAM&Maria returns wrong results with 'between' +--echo # on timestamp +--echo # + +CREATE TABLE t1( + ts TIMESTAMP NOT NULL, + c char NULL, + PRIMARY KEY(ts) +); + +INSERT INTO t1 VALUES + ('1971-01-01','a'), + ('2007-05-25','b'), + ('2008-01-01','c'), + ('2038-01-09','d'); + +--disable_warnings + +--echo +--echo # Execute select with invalid timestamp, desc ordering +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; + +--echo +--echo # Should use index condition +EXPLAIN +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +--echo + +--enable_warnings + +DROP TABLE t1; + --echo # --echo # BUG#49906: Assertion failed - Field_varstring::val_str in field.cc --echo # (Note: Fixed by patch for LP BUG#625841) diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result index 729ad81fa25..42d53f5cfde 100644 --- a/mysql-test/r/innodb_icp.result +++ b/mysql-test/r/innodb_icp.result @@ -131,6 +131,42 @@ ORDER BY c2 DESC LIMIT 2; c1 c2 c3 c4 2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 +DROP TABLE t1; +# +# BUG#43618: MyISAM&Maria returns wrong results with 'between' +# on timestamp +# +CREATE TABLE t1( +ts TIMESTAMP NOT NULL, +c char NULL, +PRIMARY KEY(ts) +); +INSERT INTO t1 VALUES +('1971-01-01','a'), +('2007-05-25','b'), +('2008-01-01','c'), +('2038-01-09','d'); + +# Execute select with invalid timestamp, desc ordering +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +ts c +2008-01-01 00:00:00 c +2007-05-25 00:00:00 b + +# Should use index condition +EXPLAIN +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 2 Using index condition + DROP TABLE t1; # # BUG#49906: Assertion failed - Field_varstring::val_str in field.cc diff --git a/mysql-test/r/maria_icp.result b/mysql-test/r/maria_icp.result index 14a450d13ca..39edaf67029 100644 --- a/mysql-test/r/maria_icp.result +++ b/mysql-test/r/maria_icp.result @@ -131,6 +131,42 @@ ORDER BY c2 DESC LIMIT 2; c1 c2 c3 c4 2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 +DROP TABLE t1; +# +# BUG#43618: MyISAM&Maria returns wrong results with 'between' +# on timestamp +# +CREATE TABLE t1( +ts TIMESTAMP NOT NULL, +c char NULL, +PRIMARY KEY(ts) +); +INSERT INTO t1 VALUES +('1971-01-01','a'), +('2007-05-25','b'), +('2008-01-01','c'), +('2038-01-09','d'); + +# Execute select with invalid timestamp, desc ordering +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +ts c +2008-01-01 00:00:00 c +2007-05-25 00:00:00 b + +# Should use index condition +EXPLAIN +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 4 Using index condition + DROP TABLE t1; # # BUG#49906: Assertion failed - Field_varstring::val_str in field.cc diff --git a/mysql-test/r/myisam_icp.result b/mysql-test/r/myisam_icp.result index 41f0d411be4..68f18da3f50 100644 --- a/mysql-test/r/myisam_icp.result +++ b/mysql-test/r/myisam_icp.result @@ -129,6 +129,42 @@ ORDER BY c2 DESC LIMIT 2; c1 c2 c3 c4 2038-01-09 03:14:07 2038-01-09 03:14:07 2009-01-05 2009-01-06 00:00:00 +DROP TABLE t1; +# +# BUG#43618: MyISAM&Maria returns wrong results with 'between' +# on timestamp +# +CREATE TABLE t1( +ts TIMESTAMP NOT NULL, +c char NULL, +PRIMARY KEY(ts) +); +INSERT INTO t1 VALUES +('1971-01-01','a'), +('2007-05-25','b'), +('2008-01-01','c'), +('2038-01-09','d'); + +# Execute select with invalid timestamp, desc ordering +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +ts c +2008-01-01 00:00:00 c +2007-05-25 00:00:00 b + +# Should use index condition +EXPLAIN +SELECT * +FROM t1 +WHERE ts BETWEEN '0000-00-00' AND '2010-00-01 00:00:00' +ORDER BY ts DESC +LIMIT 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 4 Using index condition + DROP TABLE t1; # # BUG#49906: Assertion failed - Field_varstring::val_str in field.cc From 061060eac0351ee35267bae085cc637686e21d6c Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Fri, 27 May 2011 20:50:06 -0700 Subject: [PATCH 004/359] Backported the test case for bug 52660 from mysql code line. Extended the test case to show how MariaDB applies ICP for indexes with some components defined on the beginning of fields. --- mysql-test/include/icp_tests.inc | 33 +++++++++++++++++++++++++++ mysql-test/r/innodb_icp.result | 39 ++++++++++++++++++++++++++++++++ mysql-test/r/maria_icp.result | 39 ++++++++++++++++++++++++++++++++ mysql-test/r/myisam_icp.result | 39 ++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+) diff --git a/mysql-test/include/icp_tests.inc b/mysql-test/include/icp_tests.inc index 4b28faa8e41..ebf0b7620fe 100644 --- a/mysql-test/include/icp_tests.inc +++ b/mysql-test/include/icp_tests.inc @@ -199,6 +199,39 @@ ORDER BY f1; DROP TABLE t1; +--echo # +--echo # Bug#52660 - "Perf. regr. using ICP for MyISAM on range queries on +--echo # an index containing TEXT" +--echo # + +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +CREATE TABLE t2 (a INT); +INSERT INTO t2 SELECT A.a + 10*(B.a) FROM t1 A, t1 B; + +CREATE TABLE t3 ( + c1 TINYTEXT NOT NULL, + i1 INT NOT NULL, + KEY (c1(6),i1) +); + +INSERT INTO t3 SELECT CONCAT('c-',1000+t2.a,'=w'), 1 FROM t2; + +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; + +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; + +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; + +DROP TABLE t1, t2, t3; + --echo # --echo # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on --echo # diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result index 42d53f5cfde..8937a9761ff 100644 --- a/mysql-test/r/innodb_icp.result +++ b/mysql-test/r/innodb_icp.result @@ -186,6 +186,45 @@ f1 b DROP TABLE t1; # +# Bug#52660 - "Perf. regr. using ICP for MyISAM on range queries on +# an index containing TEXT" +# +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2 (a INT); +INSERT INTO t2 SELECT A.a + 10*(B.a) FROM t1 A, t1 B; +CREATE TABLE t3 ( +c1 TINYTEXT NOT NULL, +i1 INT NOT NULL, +KEY (c1(6),i1) +); +INSERT INTO t3 SELECT CONCAT('c-',1000+t2.a,'=w'), 1 FROM t2; +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 8 NULL 3 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +c1 +c-1004=w +c-1005=w +c-1006=w +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 12 NULL 2 Using index condition; Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +c1 +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 ALL c1 NULL NULL NULL 100 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +c1 +c-1004=w +c-1005=w +c-1006=w +DROP TABLE t1, t2, t3; +# # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on # CREATE TABLE t ( diff --git a/mysql-test/r/maria_icp.result b/mysql-test/r/maria_icp.result index 39edaf67029..ff99f99d79c 100644 --- a/mysql-test/r/maria_icp.result +++ b/mysql-test/r/maria_icp.result @@ -186,6 +186,45 @@ f1 b DROP TABLE t1; # +# Bug#52660 - "Perf. regr. using ICP for MyISAM on range queries on +# an index containing TEXT" +# +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2 (a INT); +INSERT INTO t2 SELECT A.a + 10*(B.a) FROM t1 A, t1 B; +CREATE TABLE t3 ( +c1 TINYTEXT NOT NULL, +i1 INT NOT NULL, +KEY (c1(6),i1) +); +INSERT INTO t3 SELECT CONCAT('c-',1000+t2.a,'=w'), 1 FROM t2; +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 8 NULL 3 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +c1 +c-1004=w +c-1005=w +c-1006=w +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 12 NULL 2 Using index condition; Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +c1 +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 ALL c1 NULL NULL NULL 100 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +c1 +c-1004=w +c-1005=w +c-1006=w +DROP TABLE t1, t2, t3; +# # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on # CREATE TABLE t ( diff --git a/mysql-test/r/myisam_icp.result b/mysql-test/r/myisam_icp.result index 68f18da3f50..eefa4f795a1 100644 --- a/mysql-test/r/myisam_icp.result +++ b/mysql-test/r/myisam_icp.result @@ -184,6 +184,45 @@ f1 b DROP TABLE t1; # +# Bug#52660 - "Perf. regr. using ICP for MyISAM on range queries on +# an index containing TEXT" +# +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2 (a INT); +INSERT INTO t2 SELECT A.a + 10*(B.a) FROM t1 A, t1 B; +CREATE TABLE t3 ( +c1 TINYTEXT NOT NULL, +i1 INT NOT NULL, +KEY (c1(6),i1) +); +INSERT INTO t3 SELECT CONCAT('c-',1000+t2.a,'=w'), 1 FROM t2; +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 8 NULL 3 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w'; +c1 +c-1004=w +c-1005=w +c-1006=w +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range c1 c1 12 NULL 2 Using index condition; Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' and i1 > 2; +c1 +EXPLAIN +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 ALL c1 NULL NULL NULL 100 Using where +SELECT c1 FROM t3 WHERE c1 >= 'c-1004=w' and c1 <= 'c-1006=w' or i1 > 2; +c1 +c-1004=w +c-1005=w +c-1006=w +DROP TABLE t1, t2, t3; +# # Bug#40992 - InnoDB: Crash when engine_condition_pushdown is on # CREATE TABLE t ( From 37f3a801f30548cbcb636de8116a7fd5954dcd9c Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sun, 29 May 2011 11:09:05 -0700 Subject: [PATCH 005/359] Backported the test case for bug 52605. --- mysql-test/include/icp_tests.inc | 35 ++++++++++++++++++++++++++++++++ mysql-test/r/innodb_icp.result | 33 ++++++++++++++++++++++++++++++ mysql-test/r/maria_icp.result | 33 ++++++++++++++++++++++++++++++ mysql-test/r/myisam_icp.result | 33 ++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+) diff --git a/mysql-test/include/icp_tests.inc b/mysql-test/include/icp_tests.inc index ebf0b7620fe..45283de1a88 100644 --- a/mysql-test/include/icp_tests.inc +++ b/mysql-test/include/icp_tests.inc @@ -402,6 +402,41 @@ SELECT * FROM t2; DROP TABLE t1, t2; +--echo # +--echo # Bug#52605 - "Adding LIMIT 1 clause to query with complex range +--echo # predicate causes wrong results" +--echo # + +CREATE TABLE t1 ( + pk INT NOT NULL, + c1 INT, + PRIMARY KEY (pk), + KEY k1 (c1) +); + +INSERT INTO t1 VALUES (1,NULL); +INSERT INTO t1 VALUES (2,6); +INSERT INTO t1 VALUES (3,NULL); +INSERT INTO t1 VALUES (4,6); +INSERT INTO t1 VALUES (5,NULL); +INSERT INTO t1 VALUES (6,NULL); +INSERT INTO t1 VALUES (7,9); +INSERT INTO t1 VALUES (8,0); + +SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; + +EXPLAIN SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; + +DROP TABLE t1; + --echo # --echo # Bug#59259 "Incorrect rows returned for a correlated subquery --echo # when ICP is on" diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result index 8937a9761ff..f604deb71c4 100644 --- a/mysql-test/r/innodb_icp.result +++ b/mysql-test/r/innodb_icp.result @@ -377,6 +377,39 @@ a b 5 5 DROP TABLE t1, t2; # +# Bug#52605 - "Adding LIMIT 1 clause to query with complex range +# predicate causes wrong results" +# +CREATE TABLE t1 ( +pk INT NOT NULL, +c1 INT, +PRIMARY KEY (pk), +KEY k1 (c1) +); +INSERT INTO t1 VALUES (1,NULL); +INSERT INTO t1 VALUES (2,6); +INSERT INTO t1 VALUES (3,NULL); +INSERT INTO t1 VALUES (4,6); +INSERT INTO t1 VALUES (5,NULL); +INSERT INTO t1 VALUES (6,NULL); +INSERT INTO t1 VALUES (7,9); +INSERT INTO t1 VALUES (8,0); +SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +pk c1 +4 6 +EXPLAIN SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY,k1 k1 5 NULL 3 Using where; Using index +DROP TABLE t1; +# # Bug#59259 "Incorrect rows returned for a correlated subquery # when ICP is on" # diff --git a/mysql-test/r/maria_icp.result b/mysql-test/r/maria_icp.result index ff99f99d79c..3bd51d959c0 100644 --- a/mysql-test/r/maria_icp.result +++ b/mysql-test/r/maria_icp.result @@ -377,6 +377,39 @@ a b 5 5 DROP TABLE t1, t2; # +# Bug#52605 - "Adding LIMIT 1 clause to query with complex range +# predicate causes wrong results" +# +CREATE TABLE t1 ( +pk INT NOT NULL, +c1 INT, +PRIMARY KEY (pk), +KEY k1 (c1) +); +INSERT INTO t1 VALUES (1,NULL); +INSERT INTO t1 VALUES (2,6); +INSERT INTO t1 VALUES (3,NULL); +INSERT INTO t1 VALUES (4,6); +INSERT INTO t1 VALUES (5,NULL); +INSERT INTO t1 VALUES (6,NULL); +INSERT INTO t1 VALUES (7,9); +INSERT INTO t1 VALUES (8,0); +SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +pk c1 +4 6 +EXPLAIN SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY,k1 k1 5 NULL 4 Using where +DROP TABLE t1; +# # Bug#59259 "Incorrect rows returned for a correlated subquery # when ICP is on" # diff --git a/mysql-test/r/myisam_icp.result b/mysql-test/r/myisam_icp.result index eefa4f795a1..2efea16aa0b 100644 --- a/mysql-test/r/myisam_icp.result +++ b/mysql-test/r/myisam_icp.result @@ -375,6 +375,39 @@ a b 5 5 DROP TABLE t1, t2; # +# Bug#52605 - "Adding LIMIT 1 clause to query with complex range +# predicate causes wrong results" +# +CREATE TABLE t1 ( +pk INT NOT NULL, +c1 INT, +PRIMARY KEY (pk), +KEY k1 (c1) +); +INSERT INTO t1 VALUES (1,NULL); +INSERT INTO t1 VALUES (2,6); +INSERT INTO t1 VALUES (3,NULL); +INSERT INTO t1 VALUES (4,6); +INSERT INTO t1 VALUES (5,NULL); +INSERT INTO t1 VALUES (6,NULL); +INSERT INTO t1 VALUES (7,9); +INSERT INTO t1 VALUES (8,0); +SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +pk c1 +4 6 +EXPLAIN SELECT pk, c1 +FROM t1 +WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240 +ORDER BY c1 +LIMIT 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range PRIMARY,k1 k1 5 NULL 4 Using where +DROP TABLE t1; +# # Bug#59259 "Incorrect rows returned for a correlated subquery # when ICP is on" # From 9a1e54658ebd073978f36bd535754768641f70a5 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Wed, 1 Jun 2011 17:41:50 -0700 Subject: [PATCH 006/359] Modified the code backported from mysql 5.6 to make it handle virtual columns as well. --- .../suite/vcol/r/vcol_select_innodb.result | 6 +-- storage/xtradb/handler/ha_innodb.cc | 48 ++++++++++++------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/mysql-test/suite/vcol/r/vcol_select_innodb.result b/mysql-test/suite/vcol/r/vcol_select_innodb.result index 68a9b2a7a63..70414e91d4f 100644 --- a/mysql-test/suite/vcol/r/vcol_select_innodb.result +++ b/mysql-test/suite/vcol/r/vcol_select_innodb.result @@ -149,7 +149,7 @@ a b c 2 -2 -2 explain select * from t3 where a between 1 and 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using where +1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using index condition # SELECT * FROM tbl_name WHERE select * from t3 where b between -2 and -1; a b c @@ -173,7 +173,7 @@ a b c 1 -1 -1 explain select * from t3 where a between 1 and 2 order by b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using where; Using filesort +1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using index condition; Using filesort # SELECT * FROM tbl_name WHERE ORDER BY select * from t3 where a between 1 and 2 order by c; a b c @@ -181,7 +181,7 @@ a b c 1 -1 -1 explain select * from t3 where a between 1 and 2 order by c; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using where; Using filesort +1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 1 Using index condition; Using where; Using filesort # SELECT * FROM tbl_name WHERE ORDER BY select * from t3 where b between -2 and -1 order by a; a b c diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index eb93981a70e..9d9b02a9af5 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -4674,9 +4674,10 @@ build_template_needs_field( primary key columns */ dict_index_t* index, /*!< in: InnoDB index to use */ const TABLE* table, /*!< in: MySQL table object */ - ulint i) /*!< in: field index in InnoDB table */ + ulint i, /*!< in: field index in InnoDB table */ + ulint sql_idx) /*!< in: field index in SQL table */ { - const Field* field = table->field[i]; + const Field* field = table->field[sql_idx]; ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i)); @@ -4693,8 +4694,8 @@ build_template_needs_field( return(field); } - if (bitmap_is_set(table->read_set, i) - || bitmap_is_set(table->write_set, i)) { + if (bitmap_is_set(table->read_set, sql_idx) + || bitmap_is_set(table->write_set, sql_idx)) { /* This field is needed in the query */ return(field); @@ -4802,10 +4803,10 @@ ha_innobase::build_template( { dict_index_t* index; dict_index_t* clust_index; - ulint n_fields; + ulint n_fields, n_stored_fields; ibool fetch_all_in_key = FALSE; ibool fetch_primary_key_cols = FALSE; - ulint i; + ulint i, sql_idx; if (prebuilt->select_lock_type == LOCK_X) { /* We always retrieve the whole clustered index record if we @@ -4855,10 +4856,11 @@ ha_innobase::build_template( the clustered index. */ n_fields = (ulint)table->s->fields; /* number of columns */ + n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */ if (!prebuilt->mysql_template) { prebuilt->mysql_template = (mysql_row_templ_t*) - mem_alloc(n_fields * sizeof(mysql_row_templ_t)); + mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t)); } prebuilt->template_type = whole_row @@ -4876,7 +4878,12 @@ ha_innobase::build_template( if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) { /* Push down an index condition or an end_range check. */ - for (i = 0; i < n_fields; i++) { + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + const ibool index_contains = dict_index_contains_col_or_prefix(index, i); @@ -4903,14 +4910,14 @@ ha_innobase::build_template( mysql_row_templ_t* templ; if (whole_row) { - field = table->field[i]; + field = table->field[sql_idx]; } else { field = build_template_needs_field( index_contains, prebuilt->read_just_key, fetch_all_in_key, fetch_primary_key_cols, - index, table, i); + index, table, i, sql_idx); if (!field) { continue; } @@ -4982,7 +4989,12 @@ ha_innobase::build_template( /* Include the fields that are not needed in index condition pushdown. */ - for (i = 0; i < n_fields; i++) { + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + const ibool index_contains = dict_index_contains_col_or_prefix(index, i); @@ -4994,14 +5006,14 @@ ha_innobase::build_template( const Field* field; if (whole_row) { - field = table->field[i]; + field = table->field[sql_idx]; } else { field = build_template_needs_field( index_contains, prebuilt->read_just_key, fetch_all_in_key, fetch_primary_key_cols, - index, table, i); + index, table, i, sql_idx); if (!field) { continue; } @@ -5018,11 +5030,15 @@ ha_innobase::build_template( /* No index condition pushdown */ prebuilt->idx_cond = NULL; - for (i = 0; i < n_fields; i++) { + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { const Field* field; + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + if (whole_row) { - field = table->field[i]; + field = table->field[sql_idx]; } else { field = build_template_needs_field( dict_index_contains_col_or_prefix( @@ -5030,7 +5046,7 @@ ha_innobase::build_template( prebuilt->read_just_key, fetch_all_in_key, fetch_primary_key_cols, - index, table, i); + index, table, i, sql_idx); if (!field) { continue; } From adc1f2f4c939c15ad5efd37633332560456cd4fd Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Thu, 2 Jun 2011 14:03:02 -0700 Subject: [PATCH 007/359] Applied the patch for bug 58837 (for the mysql-5.6 code line). This patch fixed the crash in innodb_bug59307. --- sql/handler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/handler.h b/sql/handler.h index 4707aabbd52..96cea68e1fb 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1716,6 +1716,7 @@ public: DBUG_ENTER("ha_rnd_init"); DBUG_ASSERT(inited==NONE || (inited==RND && scan)); inited= (result= rnd_init(scan)) ? NONE: RND; + end_range= NULL; DBUG_RETURN(result); } int ha_rnd_end() @@ -1723,6 +1724,7 @@ public: DBUG_ENTER("ha_rnd_end"); DBUG_ASSERT(inited==RND); inited=NONE; + end_range= NULL; DBUG_RETURN(rnd_end()); } int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result)); From 233fd79232b580b6f25f02d447b8de32c0daca71 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 4 Jun 2011 20:06:01 +0200 Subject: [PATCH 008/359] improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions --- include/config-win.h | 2 +- storage/innodb_plugin/CMakeLists.txt | 4 +- storage/innodb_plugin/include/os0file.h | 4 + storage/innodb_plugin/include/os0sync.h | 54 +- storage/innodb_plugin/include/os0sync.ic | 9 +- storage/innodb_plugin/include/srv0srv.h | 4 +- storage/innodb_plugin/include/sync0sync.h | 2 +- storage/innodb_plugin/os/os0file.c | 195 ++------ storage/innodb_plugin/os/os0sync.c | 497 +++++++++++------- storage/innodb_plugin/srv/srv0srv.c | 14 + storage/innodb_plugin/srv/srv0start.c | 26 +- storage/xtradb/CMakeLists.txt | 15 +- storage/xtradb/include/os0file.h | 4 + storage/xtradb/include/os0sync.h | 71 ++- storage/xtradb/include/os0sync.ic | 9 +- storage/xtradb/include/srv0srv.h | 4 +- storage/xtradb/include/sync0sync.h | 2 +- storage/xtradb/os/os0file.c | 196 ++------ storage/xtradb/os/os0sync.c | 583 +++++++++++++++------- storage/xtradb/srv/srv0srv.c | 14 + storage/xtradb/srv/srv0start.c | 45 +- 21 files changed, 955 insertions(+), 799 deletions(-) diff --git a/include/config-win.h b/include/config-win.h index 6d12bb0e33f..e538cf66268 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d) #define FN_DEVCHAR ':' #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */ -#define OS_FILE_LIMIT 2048 +#define OS_FILE_LIMIT 16*1024*1024 #define DO_NOT_REMOVE_THREAD_WRAPPERS #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) diff --git a/storage/innodb_plugin/CMakeLists.txt b/storage/innodb_plugin/CMakeLists.txt index 6dfac4e52cb..36e4a0ebb36 100644 --- a/storage/innodb_plugin/CMakeLists.txt +++ b/storage/innodb_plugin/CMakeLists.txt @@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) -# Windows atomics do not perform well. Disable Windows atomics by default. -# See bug#52102 for details. -#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION) + ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) MYSQL_STORAGE_ENGINE(INNODB_PLUGIN) diff --git a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h index d645cae38bb..297d59f2b98 100644 --- a/storage/innodb_plugin/include/os0file.h +++ b/storage/innodb_plugin/include/os0file.h @@ -182,6 +182,10 @@ log. */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ +#define OS_WINXP 5 /*!< Microsoft Windows XP */ +#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */ +#define OS_WIN7 7 /*!< Microsoft Windows 7 */ + extern ulint os_n_file_reads; extern ulint os_n_file_writes; diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h index f32e7ab710a..d29c231c2f0 100644 --- a/storage/innodb_plugin/include/os0sync.h +++ b/storage/innodb_plugin/include/os0sync.h @@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" -#ifdef __WIN__ - +#ifdef _WIN32 +/** Native event (slow)*/ +typedef HANDLE os_native_event_t; /** Native mutex */ -#define os_fast_mutex_t CRITICAL_SECTION - -/** Native event */ -typedef HANDLE os_native_event_t; - -/** Operating system event */ -typedef struct os_event_struct os_event_struct_t; -/** Operating system event handle */ -typedef os_event_struct_t* os_event_t; - -/** An asynchronous signal sent between threads */ -struct os_event_struct { - os_native_event_t handle; - /*!< Windows event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /*!< list of all created events */ -}; +typedef CRITICAL_SECTION os_fast_mutex_t; +/** Native condition variable */ +typedef CONDITION_VARIABLE os_cond_t; #else /** Native mutex */ typedef pthread_mutex_t os_fast_mutex_t; +/** Native condition variable */ +typedef pthread_cond_t os_cond_t; +#endif /** Operating system event */ typedef struct os_event_struct os_event_struct_t; @@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; /** An asynchronous signal sent between threads */ struct os_event_struct { +#ifdef _WIN32 + HANDLE handle; /*!< kernel event object, slow, used on older Windows */ +#endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ ibool is_set; /*!< this is TRUE when the event is @@ -76,12 +69,14 @@ struct os_event_struct { this event */ ib_int64_t signal_count; /*!< this is incremented each time the event becomes signaled */ - pthread_cond_t cond_var; /*!< condition variable is used in + os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ UT_LIST_NODE_T(os_event_struct_t) os_event_list; /*!< list of all created events */ }; -#endif + + + /** Operating system mutex */ typedef struct os_mutex_struct os_mutex_str_t; @@ -198,21 +193,6 @@ os_event_wait_time( os_event_t event, /*!< in: event to wait */ ulint time); /*!< in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array); - /*!< in: pointer to an array of event - handles */ -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. @@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) -#elif defined(HAVE_WINDOWS_ATOMICS) +#elif defined(_WIN32) #define HAVE_ATOMIC_BUILTINS diff --git a/storage/innodb_plugin/include/os0sync.ic b/storage/innodb_plugin/include/os0sync.ic index 1f3ce38fa65..2c6c1dbe629 100644 --- a/storage/innodb_plugin/include/os0sync.ic +++ b/storage/innodb_plugin/include/os0sync.ic @@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri #endif /**********************************************************//** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! +Acquires ownership of a fast mutex. @return 0 if success, != 0 if was reserved by another thread */ UNIV_INLINE ulint @@ -38,9 +37,9 @@ os_fast_mutex_trylock( os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ { #ifdef __WIN__ - EnterCriticalSection(fast_mutex); - - return(0); + if (TryEnterCriticalSection(fast_mutex)) + return 0; + return(1); #else /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock so that it returns 0 on success. In the operating system diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 91ae895040c..f8330988b77 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup; on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; #endif /* !UNIV_HOTBACKUP */ - +#ifdef __WIN__ +extern ibool srv_use_native_conditions; +#endif extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h index 71c9920a10b..e246e256586 100644 --- a/storage/innodb_plugin/include/sync0sync.h +++ b/storage/innodb_plugin/include/sync0sync.h @@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri extern my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS +#ifdef _WIN32 typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates on LONG variable */ #else diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c index ff80f7ed1b4..59885f20566 100644 --- a/storage/innodb_plugin/os/os0file.c +++ b/storage/innodb_plugin/os/os0file.c @@ -121,7 +121,7 @@ struct os_aio_slot_struct{ which pending aio operation was completed */ #ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the + HANDLE handle; /*!< handle object we need in the OVERLAPPED struct */ OVERLAPPED control; /*!< Windows control block for the aio request */ @@ -155,7 +155,7 @@ struct os_aio_array_struct{ aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ #ifdef __WIN__ - os_native_event_t* native_events; + HANDLE* handles; /*!< Pointer to an array of OS native event handles where we copied the handles from slots, in the same @@ -229,10 +229,16 @@ os_get_os_version(void) } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { return(OS_WIN95); } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - if (os_info.dwMajorVersion <= 4) { - return(OS_WINNT); - } else { - return(OS_WIN2000); + switch(os_info.dwMajorVersion){ + case 3: + case 4: + return OS_WINNT; + case 5: + return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP; + case 6: + return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7; + default: + return OS_WIN7; } } else { ut_error; @@ -2272,13 +2278,12 @@ os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2293,40 +2298,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2355,9 +2331,6 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2399,13 +2372,13 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2420,40 +2393,12 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2476,9 +2421,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2531,14 +2473,14 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2551,50 +2493,12 @@ os_file_write( ut_ad(buf); ut_ad(n > 0); retry: - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); - } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); + ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped); /* Always do fsync to reduce the probability that when the OS crashes, a database page is only partially physically written to disk. */ @@ -2605,10 +2509,6 @@ retry: } # endif /* UNIV_DO_FLUSH */ -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; os_mutex_exit(os_file_count_mutex); @@ -3012,7 +2912,7 @@ os_aio_array_create( array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); #ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); + array->handles = ut_malloc(n * sizeof(HANDLE)); #endif for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); @@ -3020,13 +2920,14 @@ os_aio_array_create( slot->pos = i; slot->reserved = FALSE; #ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); + slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL); + over = &(slot->control); - over->hEvent = slot->event->handle; + over->hEvent = slot->handle; - *((array->native_events) + i) = over->hEvent; + *((array->handles) + i) = over->hEvent; #endif } @@ -3046,12 +2947,12 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); + CloseHandle(slot->handle); } #endif /* WIN_ASYNC_IO */ #ifdef __WIN__ - ut_free(array->native_events); + ut_free(array->handles); #endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); @@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown( for (i = 0; i < array->n_slots; i++) { - os_event_set((array->slots + i)->event); + SetEvent(array->slots[i].handle); + } } #endif @@ -3396,7 +3298,7 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); @@ -3433,7 +3335,7 @@ os_aio_array_free_slot( } #ifdef WIN_ASYNC_IO - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); } @@ -3793,15 +3695,18 @@ os_aio_windows_handle( n = array->n_slots / array->n_segments; if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); + WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE); i = pos; } else { srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - + segment * n); + i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE); } + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + + os_mutex_enter(array->mutex); slot = os_aio_array_get_nth_slot(array, i + segment * n); diff --git a/storage/innodb_plugin/os/os0sync.c b/storage/innodb_plugin/os/os0sync.c index 60467242e14..3ffef2e3c01 100644 --- a/storage/innodb_plugin/os/os0sync.c +++ b/storage/innodb_plugin/os/os0sync.c @@ -31,6 +31,7 @@ Created 9/6/1995 Heikki Tuuri #ifdef __WIN__ #include +#include #endif #include "ut0mem.h" @@ -71,11 +72,225 @@ UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; +/* The number of microsecnds in a second. */ +static const ulint MICROSECS_IN_A_SECOND = 1000000; + /* Because a mutex is embedded inside an event and there is an event embedded inside a mutex, on free, this generates a recursive call. This version of the free event function doesn't acquire the global lock */ static void os_event_free_internal(os_event_t event); +/* On Windows (Vista and later), load function pointers for condition +variable handling. Those functions are not available in prior versions, +so we have to use them via runtime loading, as long as we support XP. */ +static void os_cond_module_init(void); + +#ifdef __WIN__ +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI* InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static InitializeConditionVariableProc initialize_condition_variable; + +typedef BOOL (WINAPI* SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); +static SleepConditionVariableCSProc sleep_condition_variable; + +typedef VOID (WINAPI* WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeAllConditionVariableProc wake_all_condition_variable; + +typedef VOID (WINAPI* WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeConditionVariableProc wake_condition_variable; +#endif + +/*********************************************************//** +Initialitze condition variable */ +UNIV_INLINE +void +os_cond_init( +/*=========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(initialize_condition_variable != NULL); + initialize_condition_variable(cond); +#else + ut_a(pthread_cond_init(cond, NULL) == 0); +#endif +} + +/*********************************************************//** +Do a timed wait on condition variable. +@return TRUE if timed out, FALSE otherwise */ +UNIV_INLINE +ibool +os_cond_wait_timed( +/*===============*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex, /*!< in: fast mutex */ +#ifndef __WIN__ + const struct timespec* abstime /*!< in: timeout */ +#else + DWORD time_in_ms /*!< in: timeout in + milliseconds*/ +#endif /* !__WIN__ */ +) +{ +#ifdef __WIN__ + BOOL ret; + DWORD err; + + ut_a(sleep_condition_variable != NULL); + + ret = sleep_condition_variable(cond, mutex, time_in_ms); + + if (!ret) { + err = GetLastError(); + /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx, + "Condition variables are subject to spurious wakeups + (those not associated with an explicit wake) and stolen wakeups + (another thread manages to run before the woken thread)." + Check for both types of timeouts. + Conditions are checked by the caller.*/ + if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(TRUE); + } + } + + ut_a(ret); + + return(FALSE); +#else + int ret; + + ret = pthread_cond_timedwait(cond, mutex, abstime); + + switch (ret) { + case 0: + case ETIMEDOUT: + /* We play it safe by checking for EINTR even though + according to the POSIX documentation it can't return EINTR. */ + case EINTR: + break; + + default: + fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: " + "%d: abstime={%lu,%lu}\n", + ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec); + ut_error; + } + + return(ret == ETIMEDOUT); +#endif +} +/*********************************************************//** +Wait on condition variable */ +UNIV_INLINE +void +os_cond_wait( +/*=========*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex) /*!< in: fast mutex */ +{ + ut_a(cond); + ut_a(mutex); + +#ifdef __WIN__ + ut_a(sleep_condition_variable != NULL); + ut_a(sleep_condition_variable(cond, mutex, INFINITE)); +#else + ut_a(pthread_cond_wait(cond, mutex) == 0); +#endif +} + +/*********************************************************//** +Wakes all threads waiting for condition variable */ +UNIV_INLINE +void +os_cond_broadcast( +/*==============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_all_condition_variable != NULL); + wake_all_condition_variable(cond); +#else + ut_a(pthread_cond_broadcast(cond) == 0); +#endif +} + +/*********************************************************//** +Wakes one thread waiting for condition variable */ +UNIV_INLINE +void +os_cond_signal( +/*==========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_condition_variable != NULL); + wake_condition_variable(cond); +#else + ut_a(pthread_cond_signal(cond) == 0); +#endif +} + +/*********************************************************//** +Destroys condition variable */ +UNIV_INLINE +void +os_cond_destroy( +/*============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ +#ifdef __WIN__ + /* Do nothing */ +#else + ut_a(pthread_cond_destroy(cond) == 0); +#endif +} + +/*********************************************************//** +On Windows (Vista and later), load function pointers for condition variable +handling. Those functions are not available in prior versions, so we have to +use them via runtime loading, as long as we support XP. */ +static +void +os_cond_module_init(void) +/*=====================*/ +{ +#ifdef __WIN__ + HMODULE h_dll; + + + h_dll = GetModuleHandle("kernel32"); + + initialize_condition_variable = (InitializeConditionVariableProc) + GetProcAddress(h_dll, "InitializeConditionVariable"); + sleep_condition_variable = (SleepConditionVariableCSProc) + GetProcAddress(h_dll, "SleepConditionVariableCS"); + wake_all_condition_variable = (WakeAllConditionVariableProc) + GetProcAddress(h_dll, "WakeAllConditionVariable"); + wake_condition_variable = (WakeConditionVariableProc) + GetProcAddress(h_dll, "WakeConditionVariable"); + + /* When using native condition variables, check function pointers */ + ut_a(initialize_condition_variable); + ut_a(sleep_condition_variable); + ut_a(wake_all_condition_variable); + ut_a(wake_condition_variable); +#endif +} + /*********************************************************//** Initializes global event and OS 'slow' mutex lists. */ UNIV_INTERN @@ -89,6 +304,9 @@ os_sync_init(void) os_sync_mutex = NULL; os_sync_mutex_inited = FALSE; + /* Now for Windows only */ + os_cond_module_init(); + os_sync_mutex = os_mutex_create(NULL); os_sync_mutex_inited = TRUE; @@ -143,42 +361,46 @@ os_event_create( const char* name) /*!< in: the name of the event, if NULL the event is created without a name */ { -#ifdef __WIN__ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - TRUE, /* Manual reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows event semaphore;" - " Windows error %lu\n", - (ulong) GetLastError()); - } -#else /* Unix */ os_event_t event; - UT_NOT_USED(name); +#ifdef __WIN__ + if(!srv_use_native_conditions) { - event = ut_malloc(sizeof(struct os_event_struct)); + event = ut_malloc(sizeof(struct os_event_struct)); - os_fast_mutex_init(&(event->os_mutex)); + event->handle = CreateEvent(NULL, + TRUE, + FALSE, + (LPCTSTR) name); + if (!event->handle) { + fprintf(stderr, + "InnoDB: Could not create a Windows event" + " semaphore; Windows error %lu\n", + (ulong) GetLastError()); + } + } else /* Windows with condition variables */ - ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); +#endif - event->is_set = FALSE; + { + UT_NOT_USED(name); - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; -#endif /* __WIN__ */ + event = ut_malloc(sizeof(struct os_event_struct)); + + os_fast_mutex_init(&(event->os_mutex)); + + os_cond_init(&(event->cond_var)); + + event->is_set = FALSE; + + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + event->signal_count = 1; + } /* The os_sync_mutex can be NULL because during startup an event can be created [ because it's embedded in the mutex/rwlock ] before @@ -208,10 +430,15 @@ os_event_set( /*=========*/ os_event_t event) /*!< in: event to set */ { -#ifdef __WIN__ ut_a(event); - ut_a(SetEvent(event->handle)); -#else + +#ifdef __WIN__ + if (!srv_use_native_conditions) { + ut_a(SetEvent(event->handle)); + return; + } +#endif + ut_a(event); os_fast_mutex_lock(&(event->os_mutex)); @@ -221,11 +448,10 @@ os_event_set( } else { event->is_set = TRUE; event->signal_count += 1; - ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); + os_cond_broadcast(&(event->cond_var)); } os_fast_mutex_unlock(&(event->os_mutex)); -#endif } /**********************************************************//** @@ -244,12 +470,14 @@ os_event_reset( { ib_int64_t ret = 0; -#ifdef __WIN__ ut_a(event); - ut_a(ResetEvent(event->handle)); -#else - ut_a(event); +#ifdef __WIN__ + if(!srv_use_native_conditions) { + ut_a(ResetEvent(event->handle)); + return(0); + } +#endif os_fast_mutex_lock(&(event->os_mutex)); @@ -261,7 +489,6 @@ os_event_reset( ret = event->signal_count; os_fast_mutex_unlock(&(event->os_mutex)); -#endif return(ret); } @@ -274,19 +501,21 @@ os_event_free_internal( os_event_t event) /*!< in: event to free */ { #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions) { + ut_a(event); + ut_a(CloseHandle(event->handle)); + } else #endif - /* Remove from the list of events */ + { + ut_a(event); + /* This is to avoid freeing the mutex twice */ + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + + /* Remove from the list of events */ UT_LIST_REMOVE(os_event_list, os_event_list, event); os_event_count--; @@ -303,18 +532,19 @@ os_event_free( os_event_t event) /*!< in: event to free */ { + ut_a(event); #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions){ + ut_a(CloseHandle(event->handle)); + } else /*Windows with condition variables */ #endif - /* Remove from the list of events */ + { + os_fast_mutex_free(&(event->os_mutex)); + os_cond_destroy(&(event->cond_var)); + } + + /* Remove from the list of events */ os_mutex_enter(os_sync_mutex); UT_LIST_REMOVE(os_event_list, os_event_list, event); @@ -327,10 +557,7 @@ os_event_free( } /**********************************************************//** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). +Waits for an event object until it is in the signaled state. Typically, if the event has been signalled after the os_event_reset() we'll return immediately because event->is_set == TRUE. @@ -355,23 +582,27 @@ os_event_wait_low( returned by previous call of os_event_reset(). */ { -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - UT_NOT_USED(reset_sig_count); - - /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } -#else ib_int64_t old_signal_count; +#ifdef __WIN__ + if(!srv_use_native_conditions) { + DWORD err; + + ut_a(event); + + UT_NOT_USED(reset_sig_count); + + /* Specify an infinite wait */ + err = WaitForSingleObject(event->handle, INFINITE); + + ut_a(err == WAIT_OBJECT_0); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + return; + } +#endif + os_fast_mutex_lock(&(event->os_mutex)); @@ -396,13 +627,12 @@ os_event_wait_low( return; } - pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); + os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we have to check if the event really has been signaled after we came here to wait */ } -#endif } /**********************************************************//** @@ -418,27 +648,29 @@ os_event_wait_time( OS_SYNC_INFINITE_TIME */ { #ifdef __WIN__ - DWORD err; + if(!srv_use_native_conditions) { + DWORD err; - ut_a(event); + ut_a(event); - if (time != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, (DWORD) time / 1000); - } else { - err = WaitForSingleObject(event->handle, INFINITE); - } + if (time != OS_SYNC_INFINITE_TIME) { + err = WaitForSingleObject(event->handle, (DWORD) time / 1000); + } else { + err = WaitForSingleObject(event->handle, INFINITE); + } - if (err == WAIT_OBJECT_0) { + if (err == WAIT_OBJECT_0) { - return(0); - } else if (err == WAIT_TIMEOUT) { + return(0); + } else if (err == WAIT_TIMEOUT) { - return(OS_SYNC_TIME_EXCEEDED); - } else { - ut_error; - return(1000000); /* dummy value to eliminate compiler warn. */ - } -#else + return(OS_SYNC_TIME_EXCEEDED); + } else { + ut_error; + return(1000000); /* dummy value to eliminate compiler warn. */ + } + } +#endif UT_NOT_USED(time); /* In Posix this is just an ordinary, infinite wait */ @@ -446,43 +678,8 @@ os_event_wait_time( os_event_wait(event); return(0); -#endif } -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array) - /*!< in: pointer to an array of event - handles */ -{ - DWORD index; - - ut_a(native_event_array); - ut_a(n > 0); - - index = WaitForMultipleObjects((DWORD) n, native_event_array, - FALSE, /* Wait for any 1 event */ - INFINITE); /* Infinite wait time - limit */ - ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */ - ut_a(index < WAIT_OBJECT_0 + n); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - - return(index - WAIT_OBJECT_0); -} -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the @@ -495,24 +692,12 @@ os_mutex_create( const char* name) /*!< in: the name of the mutex, if NULL the mutex is created without a name */ { -#ifdef __WIN__ - HANDLE mutex; - os_mutex_t mutex_str; - - mutex = CreateMutex(NULL, /* No security attributes */ - FALSE, /* Initial state: no owner */ - (LPCTSTR) name); - ut_a(mutex); -#else os_fast_mutex_t* mutex; os_mutex_t mutex_str; - UT_NOT_USED(name); - mutex = ut_malloc(sizeof(os_fast_mutex_t)); os_fast_mutex_init(mutex); -#endif mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str->handle = mutex; @@ -543,25 +728,11 @@ os_mutex_enter( /*===========*/ os_mutex_t mutex) /*!< in: mutex to acquire */ { -#ifdef __WIN__ - DWORD err; - - ut_a(mutex); - - /* Specify infinite time limit for waiting */ - err = WaitForSingleObject(mutex->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - (mutex->count)++; - ut_a(mutex->count == 1); -#else os_fast_mutex_lock(mutex->handle); (mutex->count)++; ut_a(mutex->count == 1); -#endif } /**********************************************************//** @@ -577,11 +748,7 @@ os_mutex_exit( ut_a(mutex->count == 1); (mutex->count)--; -#ifdef __WIN__ - ut_a(ReleaseMutex(mutex->handle)); -#else os_fast_mutex_unlock(mutex->handle); -#endif } /**********************************************************//** @@ -610,15 +777,9 @@ os_mutex_free( os_mutex_exit(os_sync_mutex); } -#ifdef __WIN__ - ut_a(CloseHandle(mutex->handle)); - - ut_free(mutex); -#else os_fast_mutex_free(mutex->handle); ut_free(mutex->handle); ut_free(mutex); -#endif } /*********************************************************//** diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index b1fc1ac67fd..43d57c63a65 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +#ifdef __WIN__ +/* Windows native condition variables. We use runtime loading / function +pointers, because they are not available on Windows Server 2003 and +Windows XP/2000. + +We use condition for events on Windows if possible, even if os_event +resembles Windows kernel event object well API-wise. The reason is +performance, kernel objects are heavyweights and WaitForSingleObject() is a +performance killer causing calling thread to context switch. Besides, Innodb +is preallocating large number (often millions) of os_events. With kernel event +objects it takes a big chunk out of non-paged pool, which is better suited +for tasks like IO than for storing idle event objects. */ +UNIV_INTERN ibool srv_use_native_conditions = FALSE; +#endif /* __WIN__ */ UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN char** srv_data_file_names = NULL; diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index f8b5049ca65..b5892c59cfb 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void) case OS_WIN95: case OS_WIN31: case OS_WINNT: - /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, - and NT use simulated aio. In NT Windows provides async i/o, - but when run in conjunction with InnoDB Hot Backup, it seemed - to corrupt the data files. */ + srv_use_native_conditions = FALSE; + break; - os_aio_use_native_aio = FALSE; - break; - default: - /* On Win 2000 and XP use async i/o */ + case OS_WIN2000: + case OS_WINXP: + /* On 2000 and XP, async IO is available, but no condition variables. */ os_aio_use_native_aio = TRUE; - break; - } + srv_use_native_conditions = FALSE; + break; + + default: + /* On Win 2000 and XP use async i/o */ + /* Vista and later have both async IO and condition variables */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = TRUE; + break; + } #endif + if (srv_file_flush_method_str == NULL) { /* These are the default options */ diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 509f7f0fe73..6e16c4ced32 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -15,20 +15,10 @@ # This is the CMakeLists for InnoDB Plugin - - -# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin -# CMakeLists.txt still needs to work with previous versions of MySQL. -IF (MYSQL_VERSION_ID GREATER "50137") - INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake") -ENDIF (MYSQL_VERSION_ID GREATER "50137") - IF (CMAKE_SIZEOF_VOID_P MATCHES 8) SET(WIN64 TRUE) ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) -ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER) - # Include directories under xtradb INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include ${CMAKE_SOURCE_DIR}/storage/xtradb/handler) @@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) -# Windows atomics do not perform well. Disable Windows atomics by default. -# See bug#52102 for details. -#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION) -ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) + MYSQL_STORAGE_ENGINE(XTRADB) diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 732e930517b..ae656d92bb7 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -183,6 +183,10 @@ log. */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ +#define OS_WINXP 5 /*!< Microsoft Windows XP */ +#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */ +#define OS_WIN7 7 /*!< Microsoft Windows 7 */ + extern ulint os_n_file_reads; extern ulint os_n_file_writes; diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 7366e2c3402..002abebcb0b 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" -#ifdef __WIN__ - +#ifdef _WIN32 +/** Native event (slow)*/ +typedef HANDLE os_native_event_t; /** Native mutex */ -#define os_fast_mutex_t CRITICAL_SECTION - -/** Native event */ -typedef HANDLE os_native_event_t; - -/** Operating system event */ -typedef struct os_event_struct os_event_struct_t; -/** Operating system event handle */ -typedef os_event_struct_t* os_event_t; - -/** An asynchronous signal sent between threads */ -struct os_event_struct { - os_native_event_t handle; - /*!< Windows event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /*!< list of all created events */ -}; +typedef CRITICAL_SECTION os_fast_mutex_t; +/** Native condition variable */ +typedef CONDITION_VARIABLE os_cond_t; #else /** Native mutex */ typedef pthread_mutex_t os_fast_mutex_t; +/** Native condition variable */ +typedef pthread_cond_t os_cond_t; +#endif /** Operating system event */ typedef struct os_event_struct os_event_struct_t; @@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; /** An asynchronous signal sent between threads */ struct os_event_struct { +#ifdef _WIN32 + HANDLE handle; /*!< kernel event object, slow, used on older Windows */ +#endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ ibool is_set; /*!< this is TRUE when the event is @@ -76,12 +69,14 @@ struct os_event_struct { this event */ ib_int64_t signal_count; /*!< this is incremented each time the event becomes signaled */ - pthread_cond_t cond_var; /*!< condition variable is used in + os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ UT_LIST_NODE_T(os_event_struct_t) os_event_list; /*!< list of all created events */ }; -#endif + + + /** Operating system mutex */ typedef struct os_mutex_struct os_mutex_str_t; @@ -186,33 +181,23 @@ os_event_wait_low( os_event_reset(). */ #define os_event_wait(event) os_event_wait_low(event, 0) - +#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0) /**********************************************************//** Waits for an event object until it is in the signaled state or -a timeout is exceeded. +a timeout is exceeded. In Unix the timeout is always infinite. @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ UNIV_INTERN ulint -os_event_wait_time( -/*===============*/ - os_event_t event, /*!< in: event to wait */ - ulint wtime); /*!< in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( +os_event_wait_time_low( /*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array); - /*!< in: pointer to an array of event - handles */ -#endif + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count); /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ + /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. @@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) -#elif defined(HAVE_WINDOWS_ATOMICS) +#elif defined(_WIN32) #define HAVE_ATOMIC_BUILTINS diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic index 1f3ce38fa65..2c6c1dbe629 100644 --- a/storage/xtradb/include/os0sync.ic +++ b/storage/xtradb/include/os0sync.ic @@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri #endif /**********************************************************//** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! +Acquires ownership of a fast mutex. @return 0 if success, != 0 if was reserved by another thread */ UNIV_INLINE ulint @@ -38,9 +37,9 @@ os_fast_mutex_trylock( os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ { #ifdef __WIN__ - EnterCriticalSection(fast_mutex); - - return(0); + if (TryEnterCriticalSection(fast_mutex)) + return 0; + return(1); #else /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock so that it returns 0 on success. In the operating system diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index d4329d16a62..e47e10217ec 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup; on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; #endif /* !UNIV_HOTBACKUP */ - +#ifdef __WIN__ +extern ibool srv_use_native_conditions; +#endif extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index f2ff83101ab..6aaab1cc7d7 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri extern my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS +#ifdef _WIN32 typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates on LONG variable */ #else diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 5b8e656d8b2..5831b0bd0df 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -149,7 +149,7 @@ struct os_aio_slot_struct{ which pending aio operation was completed */ #ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the + HANDLE handle; /*!< handle object we need in the OVERLAPPED struct */ OVERLAPPED control; /*!< Windows control block for the aio request */ @@ -183,7 +183,7 @@ struct os_aio_array_struct{ aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ #ifdef __WIN__ - os_native_event_t* native_events; + HANDLE* handles; /*!< Pointer to an array of OS native event handles where we copied the handles from slots, in the same @@ -270,10 +270,16 @@ os_get_os_version(void) } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { return(OS_WIN95); } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - if (os_info.dwMajorVersion <= 4) { - return(OS_WINNT); - } else { - return(OS_WIN2000); + switch(os_info.dwMajorVersion){ + case 3: + case 4: + return OS_WINNT; + case 5: + return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP; + case 6: + return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7; + default: + return OS_WIN7; } } else { ut_error; @@ -2350,13 +2356,12 @@ _os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2371,40 +2376,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2433,9 +2409,7 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif + retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2477,13 +2451,13 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2498,40 +2472,11 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = (DWORD) offset; - high = (DWORD) offset_high; - os_mutex_enter(os_file_count_mutex); os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped); os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2554,9 +2499,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2609,14 +2551,13 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; - DWORD low; - DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; + + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2629,50 +2570,12 @@ os_file_write( ut_ad(buf); ut_ad(n > 0); retry: - low = (DWORD) offset; - high = (DWORD) offset_high; os_mutex_enter(os_file_count_mutex); os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); - } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); + ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped); /* Always do fsync to reduce the probability that when the OS crashes, a database page is only partially physically written to disk. */ @@ -2683,10 +2586,6 @@ retry: } # endif /* UNIV_DO_FLUSH */ -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; os_mutex_exit(os_file_count_mutex); @@ -3090,7 +2989,7 @@ os_aio_array_create( array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); #ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); + array->handles = ut_malloc(n * sizeof(HANDLE)); #endif for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); @@ -3098,13 +2997,14 @@ os_aio_array_create( slot->pos = i; slot->reserved = FALSE; #ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); + slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL); + over = &(slot->control); - over->hEvent = slot->event->handle; + over->hEvent = slot->handle; - *((array->native_events) + i) = over->hEvent; + *((array->handles) + i) = over->hEvent; #endif } @@ -3124,12 +3024,12 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); + CloseHandle(slot->handle); } #endif /* WIN_ASYNC_IO */ #ifdef __WIN__ - ut_free(array->native_events); + ut_free(array->handles); #endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); @@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown( for (i = 0; i < array->n_slots; i++) { - os_event_set((array->slots + i)->event); + SetEvent(array->slots[i].handle); + } } #endif @@ -3480,7 +3381,7 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); @@ -3518,7 +3419,7 @@ os_aio_array_free_slot( } #ifdef WIN_ASYNC_IO - os_event_reset(slot->event); + ResetEvent(slot->handle); #endif os_mutex_exit(array->mutex); } @@ -3906,15 +3807,18 @@ os_aio_windows_handle( n = array->n_slots; if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); + WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE); i = pos; } else { srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - ); + i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE); } + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + + os_mutex_enter(array->mutex); slot = os_aio_array_get_nth_slot(array, i); diff --git a/storage/xtradb/os/os0sync.c b/storage/xtradb/os/os0sync.c index dba997927cb..75bd6d44c2e 100644 --- a/storage/xtradb/os/os0sync.c +++ b/storage/xtradb/os/os0sync.c @@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0start.h" +#include "srv0srv.h" /* Type definition for an operating system mutex struct */ struct os_mutex_struct{ @@ -74,11 +75,225 @@ UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; +/* The number of microsecnds in a second. */ +static const ulint MICROSECS_IN_A_SECOND = 1000000; + /* Because a mutex is embedded inside an event and there is an event embedded inside a mutex, on free, this generates a recursive call. This version of the free event function doesn't acquire the global lock */ static void os_event_free_internal(os_event_t event); +/* On Windows (Vista and later), load function pointers for condition +variable handling. Those functions are not available in prior versions, +so we have to use them via runtime loading, as long as we support XP. */ +static void os_cond_module_init(void); + +#ifdef __WIN__ +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI* InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static InitializeConditionVariableProc initialize_condition_variable; + +typedef BOOL (WINAPI* SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); +static SleepConditionVariableCSProc sleep_condition_variable; + +typedef VOID (WINAPI* WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeAllConditionVariableProc wake_all_condition_variable; + +typedef VOID (WINAPI* WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); +static WakeConditionVariableProc wake_condition_variable; +#endif + +/*********************************************************//** +Initialitze condition variable */ +UNIV_INLINE +void +os_cond_init( +/*=========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(initialize_condition_variable != NULL); + initialize_condition_variable(cond); +#else + ut_a(pthread_cond_init(cond, NULL) == 0); +#endif +} + +/*********************************************************//** +Do a timed wait on condition variable. +@return TRUE if timed out, FALSE otherwise */ +UNIV_INLINE +ibool +os_cond_wait_timed( +/*===============*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex, /*!< in: fast mutex */ +#ifndef __WIN__ + const struct timespec* abstime /*!< in: timeout */ +#else + DWORD time_in_ms /*!< in: timeout in + milliseconds*/ +#endif /* !__WIN__ */ +) +{ +#ifdef __WIN__ + BOOL ret; + DWORD err; + + ut_a(sleep_condition_variable != NULL); + + ret = sleep_condition_variable(cond, mutex, time_in_ms); + + if (!ret) { + err = GetLastError(); + /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx, + "Condition variables are subject to spurious wakeups + (those not associated with an explicit wake) and stolen wakeups + (another thread manages to run before the woken thread)." + Check for both types of timeouts. + Conditions are checked by the caller.*/ + if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(TRUE); + } + } + + ut_a(ret); + + return(FALSE); +#else + int ret; + + ret = pthread_cond_timedwait(cond, mutex, abstime); + + switch (ret) { + case 0: + case ETIMEDOUT: + /* We play it safe by checking for EINTR even though + according to the POSIX documentation it can't return EINTR. */ + case EINTR: + break; + + default: + fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: " + "%d: abstime={%lu,%lu}\n", + ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec); + ut_error; + } + + return(ret == ETIMEDOUT); +#endif +} +/*********************************************************//** +Wait on condition variable */ +UNIV_INLINE +void +os_cond_wait( +/*=========*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex) /*!< in: fast mutex */ +{ + ut_a(cond); + ut_a(mutex); + +#ifdef __WIN__ + ut_a(sleep_condition_variable != NULL); + ut_a(sleep_condition_variable(cond, mutex, INFINITE)); +#else + ut_a(pthread_cond_wait(cond, mutex) == 0); +#endif +} + +/*********************************************************//** +Wakes all threads waiting for condition variable */ +UNIV_INLINE +void +os_cond_broadcast( +/*==============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_all_condition_variable != NULL); + wake_all_condition_variable(cond); +#else + ut_a(pthread_cond_broadcast(cond) == 0); +#endif +} + +/*********************************************************//** +Wakes one thread waiting for condition variable */ +UNIV_INLINE +void +os_cond_signal( +/*==========*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ + ut_a(cond); + +#ifdef __WIN__ + ut_a(wake_condition_variable != NULL); + wake_condition_variable(cond); +#else + ut_a(pthread_cond_signal(cond) == 0); +#endif +} + +/*********************************************************//** +Destroys condition variable */ +UNIV_INLINE +void +os_cond_destroy( +/*============*/ + os_cond_t* cond) /*!< in: condition variable. */ +{ +#ifdef __WIN__ + /* Do nothing */ +#else + ut_a(pthread_cond_destroy(cond) == 0); +#endif +} + +/*********************************************************//** +On Windows (Vista and later), load function pointers for condition variable +handling. Those functions are not available in prior versions, so we have to +use them via runtime loading, as long as we support XP. */ +static +void +os_cond_module_init(void) +/*=====================*/ +{ +#ifdef __WIN__ + HMODULE h_dll; + + + h_dll = GetModuleHandle("kernel32"); + + initialize_condition_variable = (InitializeConditionVariableProc) + GetProcAddress(h_dll, "InitializeConditionVariable"); + sleep_condition_variable = (SleepConditionVariableCSProc) + GetProcAddress(h_dll, "SleepConditionVariableCS"); + wake_all_condition_variable = (WakeAllConditionVariableProc) + GetProcAddress(h_dll, "WakeAllConditionVariable"); + wake_condition_variable = (WakeConditionVariableProc) + GetProcAddress(h_dll, "WakeConditionVariable"); + + /* When using native condition variables, check function pointers */ + ut_a(initialize_condition_variable); + ut_a(sleep_condition_variable); + ut_a(wake_all_condition_variable); + ut_a(wake_condition_variable); +#endif +} + /*********************************************************//** Initializes global event and OS 'slow' mutex lists. */ UNIV_INTERN @@ -92,6 +307,9 @@ os_sync_init(void) os_sync_mutex = NULL; os_sync_mutex_inited = FALSE; + /* Now for Windows only */ + os_cond_module_init(); + os_sync_mutex = os_mutex_create(NULL); os_sync_mutex_inited = TRUE; @@ -146,42 +364,45 @@ os_event_create( const char* name) /*!< in: the name of the event, if NULL the event is created without a name */ { -#ifdef __WIN__ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - TRUE, /* Manual reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows event semaphore;" - " Windows error %lu\n", - (ulong) GetLastError()); - } -#else /* Unix */ os_event_t event; - UT_NOT_USED(name); +#ifdef __WIN__ + if(!srv_use_native_conditions) { - event = ut_malloc(sizeof(struct os_event_struct)); + event = ut_malloc(sizeof(struct os_event_struct)); - os_fast_mutex_init(&(event->os_mutex)); + event->handle = CreateEvent(NULL, + TRUE, + FALSE, + (LPCTSTR) name); + if (!event->handle) { + fprintf(stderr, + "InnoDB: Could not create a Windows event" + " semaphore; Windows error %lu\n", + (ulong) GetLastError()); + } + } else /* Windows with condition variables */ +#endif - ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); + { + UT_NOT_USED(name); - event->is_set = FALSE; + event = ut_malloc(sizeof(struct os_event_struct)); - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; -#endif /* __WIN__ */ + os_fast_mutex_init(&(event->os_mutex)); + + os_cond_init(&(event->cond_var)); + + event->is_set = FALSE; + + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + event->signal_count = 1; + } /* The os_sync_mutex can be NULL because during startup an event can be created [ because it's embedded in the mutex/rwlock ] before @@ -211,10 +432,15 @@ os_event_set( /*=========*/ os_event_t event) /*!< in: event to set */ { -#ifdef __WIN__ ut_a(event); - ut_a(SetEvent(event->handle)); -#else + +#ifdef __WIN__ + if (!srv_use_native_conditions) { + ut_a(SetEvent(event->handle)); + return; + } +#endif + ut_a(event); os_fast_mutex_lock(&(event->os_mutex)); @@ -224,11 +450,10 @@ os_event_set( } else { event->is_set = TRUE; event->signal_count += 1; - ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); + os_cond_broadcast(&(event->cond_var)); } os_fast_mutex_unlock(&(event->os_mutex)); -#endif } /**********************************************************//** @@ -247,12 +472,14 @@ os_event_reset( { ib_int64_t ret = 0; -#ifdef __WIN__ ut_a(event); - ut_a(ResetEvent(event->handle)); -#else - ut_a(event); +#ifdef __WIN__ + if(!srv_use_native_conditions) { + ut_a(ResetEvent(event->handle)); + return(0); + } +#endif os_fast_mutex_lock(&(event->os_mutex)); @@ -264,7 +491,6 @@ os_event_reset( ret = event->signal_count; os_fast_mutex_unlock(&(event->os_mutex)); -#endif return(ret); } @@ -277,17 +503,20 @@ os_event_free_internal( os_event_t event) /*!< in: event to free */ { #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions) { + ut_a(event); + ut_a(CloseHandle(event->handle)); + } else #endif + { + ut_a(event); + + /* This is to avoid freeing the mutex twice */ + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + /* Remove from the list of events */ UT_LIST_REMOVE(os_event_list, os_event_list, event); @@ -306,16 +535,18 @@ os_event_free( os_event_t event) /*!< in: event to free */ { + ut_a(event); #ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); + if(!srv_use_native_conditions){ + ut_a(CloseHandle(event->handle)); + } else /*Windows with condition variables */ #endif + { + os_fast_mutex_free(&(event->os_mutex)); + + os_cond_destroy(&(event->cond_var)); + } + /* Remove from the list of events */ os_mutex_enter(os_sync_mutex); @@ -358,24 +589,25 @@ os_event_wait_low( returned by previous call of os_event_reset(). */ { -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - UT_NOT_USED(reset_sig_count); - - /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } -#else + ib_int64_t old_signal_count; +#ifdef __WIN__ + if(!srv_use_native_conditions) { + DWORD err; + + ut_a(event); + + UT_NOT_USED(reset_sig_count); + + /* Specify an infinite wait */ + err = WaitForSingleObject(event->handle, INFINITE); + + ut_a(err == WAIT_OBJECT_0); + return; + } +#endif + os_fast_mutex_lock(&(event->os_mutex)); if (reset_sig_count) { @@ -399,13 +631,12 @@ os_event_wait_low( return; } - pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); + os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we have to check if the event really has been signaled after we came here to wait */ } -#endif } /**********************************************************//** @@ -414,112 +645,112 @@ a timeout is exceeded. @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ UNIV_INTERN ulint -os_event_wait_time( -/*===============*/ - os_event_t event, /*!< in: event to wait */ - ulint wtime) /*!< in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ +os_event_wait_time_low( +/*===================*/ + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count) /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ + { + ibool timed_out = FALSE; + #ifdef __WIN__ - DWORD err; + DWORD time_in_ms; - ut_a(event); + if (!srv_use_native_conditions) { + DWORD err; - if (wtime != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, (DWORD) wtime / 1000); - } else { - err = WaitForSingleObject(event->handle, INFINITE); - } + ut_a(event); - if (err == WAIT_OBJECT_0) { + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + time_in_ms = time_in_usec / 1000; + err = WaitForSingleObject(event->handle, time_in_ms); + } else { + err = WaitForSingleObject(event->handle, INFINITE); + } - return(0); - } else if (err == WAIT_TIMEOUT) { + if (err == WAIT_OBJECT_0) { + return(0); + } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { + return(OS_SYNC_TIME_EXCEEDED); + } - return(OS_SYNC_TIME_EXCEEDED); - } else { ut_error; - return(1000000); /* dummy value to eliminate compiler warn. */ - } -#else - int err; - int ret = 0; - ulint tmp; - ib_int64_t old_count; - struct timeval tv_start; - struct timespec timeout; + /* Dummy value to eliminate compiler warning. */ + return(42); + } else { + ut_a(sleep_condition_variable != NULL); - if (wtime == OS_SYNC_INFINITE_TIME) { - os_event_wait(event); - return 0; - } - - /* Compute the absolute point in time at which to time out. */ - gettimeofday(&tv_start, NULL); - tmp = tv_start.tv_usec + wtime; - timeout.tv_sec = tv_start.tv_sec + (tmp / 1000000); - timeout.tv_nsec = (tmp % 1000000) * 1000; - - os_fast_mutex_lock(&(event->os_mutex)); - old_count = event->signal_count; - - for (;;) { - if (event->is_set == TRUE || event->signal_count != old_count) - break; - - err = pthread_cond_timedwait(&(event->cond_var), - &(event->os_mutex), &timeout); - if (err == ETIMEDOUT) { - ret = OS_SYNC_TIME_EXCEEDED; - break; + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + time_in_ms = time_in_usec / 1000; + } else { + time_in_ms = INFINITE; } } +#else + struct timespec abstime; - os_fast_mutex_unlock(&(event->os_mutex)); + if (time_in_usec != OS_SYNC_INFINITE_TIME) { + struct timeval tv; + int ret; + ulint sec; + ulint usec; - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + ret = ut_usectime(&sec, &usec); + ut_a(ret == 0); - os_thread_exit(NULL); + tv.tv_sec = sec; + tv.tv_usec = usec; + + tv.tv_usec += time_in_usec; + + if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) { + tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND; + tv.tv_usec %= MICROSECS_IN_A_SECOND; + } + + abstime.tv_sec = tv.tv_sec; + abstime.tv_nsec = tv.tv_usec * 1000; + } else { + abstime.tv_nsec = 999999999; + abstime.tv_sec = (time_t) ULINT_MAX; } - return ret; -#endif -} + ut_a(abstime.tv_nsec <= 999999999); -#ifdef __WIN__ -/**********************************************************//** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. -@return index of the event which was signaled */ -UNIV_INTERN -ulint -os_event_wait_multiple( -/*===================*/ - ulint n, /*!< in: number of events in the - array */ - os_native_event_t* native_event_array) - /*!< in: pointer to an array of event - handles */ -{ - DWORD index; +#endif /* __WIN__ */ - ut_a(native_event_array); - ut_a(n > 0); + os_fast_mutex_lock(&event->os_mutex); - index = WaitForMultipleObjects((DWORD) n, native_event_array, - FALSE, /* Wait for any 1 event */ - INFINITE); /* Infinite wait time - limit */ - ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */ - ut_a(index < WAIT_OBJECT_0 + n); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); + if (!reset_sig_count) { + reset_sig_count = event->signal_count; } - return(index - WAIT_OBJECT_0); + do { + if (event->is_set || event->signal_count != reset_sig_count) { + + break; + } + + timed_out = os_cond_wait_timed( + &event->cond_var, &event->os_mutex, +#ifndef __WIN__ + &abstime +#else + time_in_ms +#endif /* !__WIN__ */ + ); + + } while (!timed_out); + + os_fast_mutex_unlock(&event->os_mutex); + + return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); } -#endif /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the @@ -532,15 +763,6 @@ os_mutex_create( const char* name) /*!< in: the name of the mutex, if NULL the mutex is created without a name */ { -#ifdef __WIN__ - HANDLE mutex; - os_mutex_t mutex_str; - - mutex = CreateMutex(NULL, /* No security attributes */ - FALSE, /* Initial state: no owner */ - (LPCTSTR) name); - ut_a(mutex); -#else os_fast_mutex_t* mutex; os_mutex_t mutex_str; @@ -549,7 +771,6 @@ os_mutex_create( mutex = ut_malloc(sizeof(os_fast_mutex_t)); os_fast_mutex_init(mutex); -#endif mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str->handle = mutex; @@ -580,25 +801,11 @@ os_mutex_enter( /*===========*/ os_mutex_t mutex) /*!< in: mutex to acquire */ { -#ifdef __WIN__ - DWORD err; - - ut_a(mutex); - - /* Specify infinite time limit for waiting */ - err = WaitForSingleObject(mutex->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - (mutex->count)++; - ut_a(mutex->count == 1); -#else os_fast_mutex_lock(mutex->handle); (mutex->count)++; ut_a(mutex->count == 1); -#endif } /**********************************************************//** @@ -614,11 +821,7 @@ os_mutex_exit( ut_a(mutex->count == 1); (mutex->count)--; -#ifdef __WIN__ - ut_a(ReleaseMutex(mutex->handle)); -#else os_fast_mutex_unlock(mutex->handle); -#endif } /**********************************************************//** @@ -647,15 +850,9 @@ os_mutex_free( os_mutex_exit(os_sync_mutex); } -#ifdef __WIN__ - ut_a(CloseHandle(mutex->handle)); - - ut_free(mutex); -#else os_fast_mutex_free(mutex->handle); ut_free(mutex->handle); ut_free(mutex); -#endif } /*********************************************************//** diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index f39d1b8a758..86a328e77c0 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +#ifdef __WIN__ +/* Windows native condition variables. We use runtime loading / function +pointers, because they are not available on Windows Server 2003 and +Windows XP/2000. + +We use condition for events on Windows if possible, even if os_event +resembles Windows kernel event object well API-wise. The reason is +performance, kernel objects are heavyweights and WaitForSingleObject() is a +performance killer causing calling thread to context switch. Besides, Innodb +is preallocating large number (often millions) of os_events. With kernel event +objects it takes a big chunk out of non-paged pool, which is better suited +for tasks like IO than for storing idle event objects. */ +UNIV_INTERN ibool srv_use_native_conditions = FALSE; +#endif /* __WIN__ */ UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN char** srv_data_file_names = NULL; diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index cef045d72e1..e2ff49ce390 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void) case OS_WIN95: case OS_WIN31: case OS_WINNT: - /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, - and NT use simulated aio. In NT Windows provides async i/o, - but when run in conjunction with InnoDB Hot Backup, it seemed - to corrupt the data files. */ + srv_use_native_conditions = FALSE; + break; - os_aio_use_native_aio = FALSE; - break; - default: + case OS_WIN2000: + case OS_WINXP: + /* On 2000 and XP, async IO is available, but no condition variables. */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = FALSE; + break; + + default: /* On Win 2000 and XP use async i/o */ - //os_aio_use_native_aio = TRUE; - os_aio_use_native_aio = FALSE; - fprintf(stderr, - "InnoDB: Windows native async i/o is disabled as default.\n" - "InnoDB: It is not applicable for the current" - " multi io threads implementation.\n"); - break; - } + /* Vista and later have both async IO and condition variables */ + os_aio_use_native_aio = TRUE; + srv_use_native_conditions = TRUE; + break; + } #endif if (srv_file_flush_method_str == NULL) { /* These are the default options */ @@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void) srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; +#ifdef __WIN__ + srv_n_read_io_threads = srv_n_write_io_threads = 1; +#endif + #ifndef __WIN__ } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { srv_unix_file_flush_method = SRV_UNIX_FSYNC; @@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, - "async_unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - os_aio_use_native_aio = TRUE; - srv_n_read_io_threads = srv_n_write_io_threads = 1; - fprintf(stderr, - "InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n" - "InnoDB: Windows native async i/o is enabled.\n" - "InnoDB: And io threads are restricted.\n"); + } #endif } else { fprintf(stderr, From 69ffc06610136c45d79f5e430373ee9ce4c78cb5 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sat, 11 Jun 2011 14:28:15 +0300 Subject: [PATCH 009/359] Fixes BUG#60976 "Crash, valgrind warning and memory leak with partitioned archive tables" Noted that there was no memory leak, just a lot of used partitioned tables. Fixed old bug: 'show status' now shows memory usage when compiled with safemalloc. Added option --flush to mysqlcheck.c to run a 'flush tables' between each check to keep down memory usage. Changed '--safemalloc' options to mysqld so that one can use --safemalloc and --skip-safemalloc. Now skip-safemalloc is default (ie, we only do checking of memory overrun during free()) to speed up tests. client/client_priv.h: Added OPT_FLUSH_TABLES client/mysqlcheck.c: Added option --flush to mysqlcheck.c to run a 'flush tables' between each check to keep down memory usage. mysql-test/mysql-test-run.pl: Always run tests with --loose-skip-safemysqld for higher speed sql/mysqld.cc: Changed '--safemalloc' options so that one can use --safemalloc and --skip-safemalloc. Now skip-safemalloc is default (ie, we only do checking of memory overrun during free()) to speed up tests sql/sql_parse.cc: Fixed old bug: 'show status' now shows memory usage when compiled with safemalloc. storage/archive/archive_reader.c: Changed all malloc() calls to use my_malloc()/my_free() Added checks of malloc() calls. storage/archive/ha_archive.cc: Detect failure if init_archive_reader() and return errno. This fixed assert crash in my_seek(). Changed all malloc() calls to use my_malloc()/my_free() --- client/client_priv.h | 1 + client/mysqlcheck.c | 30 +++++++++++++++++++++++++----- mysql-test/mysql-test-run.pl | 3 +-- sql/ha_partition.cc | 2 +- sql/mysqld.cc | 23 ++++++++++------------- sql/sql_parse.cc | 2 +- storage/archive/archive_reader.c | 20 ++++++++++++-------- storage/archive/ha_archive.cc | 22 ++++++++++++++-------- 8 files changed, 65 insertions(+), 38 deletions(-) diff --git a/client/client_priv.h b/client/client_priv.h index 28edba5f8b2..fe9081c4b91 100644 --- a/client/client_priv.h +++ b/client/client_priv.h @@ -60,6 +60,7 @@ enum options_client OPT_OPEN_FILES_LIMIT, OPT_SET_CHARSET, OPT_SERVER_ARG, OPT_POSITION, OPT_STOP_POSITION, OPT_START_DATETIME, OPT_STOP_DATETIME, OPT_SIGINT_IGNORE, OPT_HEXBLOB, OPT_ORDER_BY_PRIMARY, OPT_COUNT, + OPT_FLUSH_TABLES, #ifdef HAVE_NDBCLUSTER_DB OPT_NDBCLUSTER, OPT_NDB_CONNECTSTRING, #endif diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c index 14e02bbb328..22f4b70ed81 100644 --- a/client/mysqlcheck.c +++ b/client/mysqlcheck.c @@ -16,7 +16,7 @@ /* By Jani Tolonen, 2001-04-20, MySQL Development Team */ -#define CHECK_VERSION "2.6.0" +#define CHECK_VERSION "2.7.0" #include "client_priv.h" #include @@ -35,8 +35,8 @@ static my_bool opt_alldbs = 0, opt_check_only_changed = 0, opt_extended = 0, opt_medium_check = 0, opt_quick = 0, opt_all_in_1 = 0, opt_silent = 0, opt_auto_repair = 0, ignore_errors = 0, tty_password= 0, opt_frm= 0, debug_info_flag= 0, debug_check_flag= 0, - opt_fix_table_names= 0, opt_fix_db_names= 0, opt_upgrade= 0, - opt_write_binlog= 1; + opt_fix_table_names= 0, opt_fix_db_names= 0, opt_upgrade= 0; +static my_bool opt_write_binlog= 1, opt_flush_tables= 0; static uint verbose = 0, opt_mysql_port=0; static int my_end_arg; static char * opt_mysql_unix_port = 0; @@ -121,6 +121,9 @@ static struct my_option my_long_options[] = "If you are using this option with CHECK TABLE, it will ensure that the table is 100 percent consistent, but will take a long time. If you are using this option with REPAIR TABLE, it will force using old slow repair with keycache method, instead of much faster repair by sorting.", &opt_extended, &opt_extended, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"flush", OPT_FLUSH_TABLES, "Flush each table after check. This is useful if you don't want to have the checked tables take up space in the caches after the check", + &opt_flush_tables, &opt_flush_tables, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, + 0, 0 }, {"help", '?', "Display this help message and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"host",'h', "Connect to host.", ¤t_host, @@ -685,6 +688,7 @@ static int disable_binlog() static int handle_request_for_tables(char *tables, uint length) { char *query, *end, options[100], message[100]; + char table_name_buff[NAME_CHAR_LEN*2*2+1], *table_name; uint query_length= 0; const char *op = 0; @@ -723,13 +727,17 @@ static int handle_request_for_tables(char *tables, uint length) /* No backticks here as we added them before */ query_length= my_sprintf(query, (query, "%s TABLE %s %s", op, tables, options)); + table_name= tables; } else { - char *ptr; + char *ptr, *org; - ptr= strmov(strmov(query, op), " TABLE "); + org= ptr= strmov(strmov(query, op), " TABLE "); ptr= fix_table_name(ptr, tables); + strmake(table_name_buff, org, min((int) sizeof(table_name_buff)-1, + (int) (ptr - org))); + table_name= table_name_buff; ptr= strxmov(ptr, " ", options, NullS); query_length= (uint) (ptr - query); } @@ -737,9 +745,21 @@ static int handle_request_for_tables(char *tables, uint length) { sprintf(message, "when executing '%s TABLE ... %s'", op, options); DBerror(sock, message); + my_free(query, MYF(0)); return 1; } print_result(); + if (opt_flush_tables) + { + query_length= my_sprintf(query, + (query, "FLUSH TABLES %s", table_name)); + if (mysql_real_query(sock, query, query_length)) + { + DBerror(sock, query); + my_free(query, MYF(0)); + return 1; + } + } my_free(query, MYF(0)); return 0; } diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index b91f4f12d73..8d11abc8a37 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -4943,14 +4943,13 @@ sub mysqld_arguments ($$$) { if ( $opt_valgrind_mysqld ) { - mtr_add_arg($args, "--skip-safemalloc"); - if ( $mysql_version_id < 50100 ) { mtr_add_arg($args, "--skip-bdb"); } } + mtr_add_arg($args, "--loose-skip-safemalloc"); mtr_add_arg($args, "%s--disable-sync-frm"); if (!using_extern() and $mysql_version_id >= 50106 && !$opt_user_args) diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 9e39c8eec8b..406d20f073d 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -2456,7 +2456,7 @@ bool ha_partition::read_par_file(const char *name) fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT); /* Following could be done with my_stat to read in whole file */ - if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0) + if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(MY_WME))) < 0) DBUG_RETURN(true); if (my_read(file, (uchar *) & buff[0], PAR_WORD_SIZE, MYF(MY_NABP))) goto err1; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 3fee65fe963..2a5ce27d649 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -427,7 +427,7 @@ static bool volatile ready_to_exit; static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0; static my_bool opt_short_log_format= 0; static my_bool opt_ignore_wrong_options= 0, opt_expect_abort= 0; -static my_bool opt_sync= 0; +static my_bool opt_sync= 0, sf_malloc_trough_check= 0; static uint kill_cached_threads, wake_thread; ulong thread_created; uint thread_handling; @@ -5944,7 +5944,7 @@ enum options_mysqld OPT_NDB_REPORT_THRESH_BINLOG_EPOCH_SLIP, OPT_NDB_REPORT_THRESH_BINLOG_MEM_USAGE, OPT_NDB_USE_COPYING_ALTER_TABLE, - OPT_SKIP_SAFEMALLOC, OPT_MUTEX_DEADLOCK_DETECTOR, + OPT_SAFEMALLOC, OPT_MUTEX_DEADLOCK_DETECTOR, OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE, OPT_SKIP_SYMLINKS, OPT_MAX_BINLOG_DUMP_EVENTS, OPT_SPORADIC_BINLOG_DUMP_FAIL, @@ -6898,12 +6898,11 @@ each time the SQL thread starts.", {"skip-networking", OPT_SKIP_NETWORKING, "Don't allow connection with TCP/IP.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, -#ifndef DBUG_OFF -#ifdef SAFEMALLOC - {"skip-safemalloc", OPT_SKIP_SAFEMALLOC, - "Don't use the memory allocation checking.", 0, 0, 0, GET_NO_ARG, NO_ARG, - 0, 0, 0, 0, 0, 0}, -#endif +#if !defined(DBUG_OFF) && defined(SAFEMALLOC) + {"safemalloc", OPT_SAFEMALLOC, + "Check all memory allocation for every malloc/free call.", + &sf_malloc_trough_check, &sf_malloc_trough_check, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, #endif {"skip-show-database", OPT_SKIP_SHOW_DB, "Don't allow 'SHOW DATABASE' commands.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, @@ -9238,11 +9237,6 @@ mysqld_get_one_option(int optid, } strmake(ft_boolean_syntax, argument, sizeof(ft_boolean_syntax)-1); break; - case OPT_SKIP_SAFEMALLOC: -#ifdef SAFEMALLOC - sf_malloc_quick=1; -#endif - break; case OPT_LOWER_CASE_TABLE_NAMES: lower_case_table_names= argument ? atoi(argument) : 1; lower_case_table_names_used= 1; @@ -9436,6 +9430,9 @@ static int get_options(int *argc,char **argv) &global_system_variables.datetime_format)) return 1; +#ifdef SAFEMALLOC + sf_malloc_quick= !sf_malloc_trough_check; +#endif #ifdef EMBEDDED_LIBRARY one_thread_scheduler(&thread_scheduler); one_thread_scheduler(&extra_thread_scheduler); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index e8b94d11cd2..e4169b1da62 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1540,7 +1540,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, { char *end= buff + length; length+= my_snprintf(end, buff_len - length - 1, - end," Memory in use: %ldK Max memory used: %ldK", + " Memory in use: %ldK Max memory used: %ldK", (sf_malloc_cur_memory+1023L)/1024L, (sf_malloc_max_memory+1023L)/1024L); } diff --git a/storage/archive/archive_reader.c b/storage/archive/archive_reader.c index 0cf795cefdf..90220f26f6d 100644 --- a/storage/archive/archive_reader.c +++ b/storage/archive/archive_reader.c @@ -93,12 +93,16 @@ int main(int argc, char *argv[]) printf("\tFRM length %u\n", reader_handle.frm_length); if (reader_handle.comment_start_pos) { - char *comment = - (char *) malloc(sizeof(char) * reader_handle.comment_length); - azread_comment(&reader_handle, comment); - printf("\tComment length %u\n\t\t%.*s\n", reader_handle.comment_length, - reader_handle.comment_length, comment); - free(comment); + char *comment = (char *) my_malloc(reader_handle.comment_length, + MYF(MY_WME)); + if (comment) + { + azread_comment(&reader_handle, comment); + printf("\tComment length %u\n\t\t%.*s\n", + reader_handle.comment_length, + reader_handle.comment_length, comment); + my_free(comment,MYF(0)); + } } } else @@ -180,7 +184,7 @@ int main(int argc, char *argv[]) azio_stream writer_handle; - buffer= (char *)malloc(reader_handle.longest_row); + buffer= (char *) my_malloc(reader_handle.longest_row, MYF(0)); if (buffer == NULL) { printf("Could not allocate memory for row %llu\n", row_count); @@ -251,7 +255,7 @@ int main(int argc, char *argv[]) break; } - free(buffer); + my_free(buffer, MYF(0)); azclose(&writer_handle); } diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index d70de0dd13c..730d5b95abb 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -835,7 +835,7 @@ int ha_archive::write_row(uchar *buf) if (!share->archive_write_open) if (init_archive_writer()) - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + DBUG_RETURN(errno); if (table->next_number_field && record == table->record[0]) @@ -1020,7 +1020,8 @@ int ha_archive::rnd_init(bool scan) if (share->crashed) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - init_archive_reader(); + if (init_archive_reader()) + DBUG_RETURN(errno); /* We rewind the file so that we can read from the beginning if scan */ if (scan) @@ -1317,7 +1318,8 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) char* frm_string; DBUG_ENTER("ha_archive::optimize"); - init_archive_reader(); + if (init_archive_reader()) + DBUG_RETURN(errno); // now we close both our writer and our reader for the rename if (share->archive_write_open) @@ -1326,7 +1328,7 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) share->archive_write_open= FALSE; } - if (!(frm_string= (char*) malloc(archive.frm_length))) + if (!(frm_string= (char*) my_malloc(archive.frm_length, MYF(0)))) return ENOMEM; azread_frm(&archive, frm_string); @@ -1337,12 +1339,12 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) if (!(azopen(&writer, writer_filename, O_CREAT|O_RDWR|O_BINARY))) { - free(frm_string); + my_free(frm_string, MYF(0)); DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); } rc= azwrite_frm(&writer, frm_string, archive.frm_length); - free(frm_string); + my_free(frm_string, MYF(0)); if (rc) { rc= HA_ERR_CRASHED_ON_USAGE; @@ -1547,7 +1549,9 @@ int ha_archive::info(uint flag) if (flag & HA_STATUS_AUTO) { - init_archive_reader(); + if (init_archive_reader()) + DBUG_RETURN(errno); + pthread_mutex_lock(&share->mutex); azflush(&archive, Z_SYNC_FLUSH); pthread_mutex_unlock(&share->mutex); @@ -1626,7 +1630,9 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) Now we will rewind the archive file so that we are positioned at the start of the file. */ - init_archive_reader(); + if (init_archive_reader()) + DBUG_RETURN(errno); + read_data_header(&archive); while (!(rc= get_row(&archive, table->record[0]))) count--; From 6d6bde664590e1a4fdbf60afddaeb21636c1f7e4 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sat, 11 Jun 2011 14:28:37 +0300 Subject: [PATCH 010/359] Increased server version to 5.2.7 --- configure.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.in b/configure.in index febbf1d14ea..c9b097d2f84 100644 --- a/configure.in +++ b/configure.in @@ -13,7 +13,7 @@ dnl When changing the major version number please also check the switch dnl statement in mysqlbinlog::check_master_version(). You may also need dnl to update version.c in ndb. -AC_INIT([MariaDB Server], [5.2.6-MariaDB], [], [mysql]) +AC_INIT([MariaDB Server], [5.2.7-MariaDB], [], [mysql]) AC_CONFIG_SRCDIR([sql/mysqld.cc]) AC_CANONICAL_SYSTEM From ff0b30219f8b47b70f14211c1180148de3dee4d1 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sat, 11 Jun 2011 14:53:08 +0300 Subject: [PATCH 011/359] Updated to new error messages for partitions when .par file is missing --- mysql-test/r/partition_error.result | 5 +++-- mysql-test/t/partition_error.test | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/mysql-test/r/partition_error.result b/mysql-test/r/partition_error.result index 269b6875430..a14b1280eb7 100644 --- a/mysql-test/r/partition_error.result +++ b/mysql-test/r/partition_error.result @@ -657,15 +657,16 @@ CREATE TABLE t1 (a INT) PARTITION BY HASH (a); FLUSH TABLES; CHECK TABLE t1; Table Op Msg_type Msg_text +test.t1 check Error File './test/t1.par' not found (Errcode: 2) test.t1 check Error Failed to read from the .par file test.t1 check Error Incorrect information in file: './test/t1.frm' test.t1 check error Corrupt SELECT * FROM t1; -ERROR HY000: Failed to read from the .par file +ERROR HY000: File './test/t1.par' not found (Errcode: 2) # Note that it is currently impossible to drop a partitioned table # without the .par file DROP TABLE t1; -ERROR 42S02: Unknown table 't1' +ERROR HY000: File './test/t1.par' not found (Errcode: 2) # # Bug#49477: Assertion `0' failed in ha_partition.cc:5530 # with temporary table and partitions diff --git a/mysql-test/t/partition_error.test b/mysql-test/t/partition_error.test index 7e574fd6a42..81bb7b55cec 100644 --- a/mysql-test/t/partition_error.test +++ b/mysql-test/t/partition_error.test @@ -695,12 +695,15 @@ CREATE TABLE t1 (a INT) PARTITION BY HASH (a); FLUSH TABLES; --remove_file $MYSQLD_DATADIR/test/t1.par --replace_result $MYSQLD_DATADIR ./ +--replace_result \\ / CHECK TABLE t1; ---error ER_UNKNOWN_ERROR +--replace_result \\ / +--error 29 SELECT * FROM t1; --echo # Note that it is currently impossible to drop a partitioned table --echo # without the .par file ---error ER_BAD_TABLE_ERROR +--replace_result \\ / +--error 29 DROP TABLE t1; --remove_file $MYSQLD_DATADIR/test/t1.frm --remove_file $MYSQLD_DATADIR/test/t1#P#p0.MYI From 2f6c43c5a02482ba92f2d463a6f5bf6383f0ec51 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 12 Jun 2011 12:52:51 +0300 Subject: [PATCH 012/359] Fixed warning that sf_malloc_trough_check was not used when compiling without SAFEMALLOC --- sql/mysqld.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2a5ce27d649..51824d23a14 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -427,7 +427,7 @@ static bool volatile ready_to_exit; static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0; static my_bool opt_short_log_format= 0; static my_bool opt_ignore_wrong_options= 0, opt_expect_abort= 0; -static my_bool opt_sync= 0, sf_malloc_trough_check= 0; +static my_bool opt_sync= 0; static uint kill_cached_threads, wake_thread; ulong thread_created; uint thread_handling; @@ -444,6 +444,9 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," #else ; #endif +#ifdef SAFEMALLOC +my_bool sf_malloc_trough_check= 0; +#endif static char *mysqld_user, *mysqld_chroot, *log_error_file_ptr; static char *opt_init_slave, *language_ptr, *opt_init_connect; static char *default_character_set_name; @@ -6898,7 +6901,7 @@ each time the SQL thread starts.", {"skip-networking", OPT_SKIP_NETWORKING, "Don't allow connection with TCP/IP.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, -#if !defined(DBUG_OFF) && defined(SAFEMALLOC) +#ifdef SAFEMALLOC {"safemalloc", OPT_SAFEMALLOC, "Check all memory allocation for every malloc/free call.", &sf_malloc_trough_check, &sf_malloc_trough_check, 0, From 4171483b539555f50336d4304d931ef743cf7011 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 15:52:07 +0200 Subject: [PATCH 013/359] Backport Fix for Bug#24509 - 2048 file descriptor limit on windows needs increasing. The patch replaces the use of the POSIX I/O interfaces in mysys on Windows with the Win32 API calls (CreateFile, WriteFile, etc). The Windows HANDLE for the open file is stored in the my_file_info struct, along with a flag for append mode (because the Windows API does not support opening files in append mode in all cases) The default max open files has been increased to 16384 and can be increased further by setting --max-open-files= during the server start. Noteworthy benefit of this patch is that it removes limits from the table_cache size - allowing for more simultaneus users --- client/mysqlbinlog.cc | 2 +- client/mysqlslap.c | 2 +- client/readline.cc | 2 +- include/config-win.h | 3 +- include/my_dir.h | 4 + include/my_global.h | 41 +- include/my_sys.h | 23 +- libmysql/CMakeLists.txt | 1 + mysys/CMakeLists.txt | 2 +- mysys/default_modify.c | 8 +- mysys/mf_iocache.c | 3 - mysys/my_chsize.c | 19 +- mysys/my_create.c | 15 +- mysys/my_dup.c | 6 +- mysys/my_file.c | 1 + mysys/my_fopen.c | 48 +- mysys/my_fstream.c | 17 +- mysys/my_init.c | 3 +- mysys/my_lib.c | 26 +- mysys/my_lock.c | 137 ++++- mysys/my_mmap.c | 6 +- mysys/my_open.c | 227 +------ mysys/my_pread.c | 81 +-- mysys/my_quick.c | 22 +- mysys/my_read.c | 7 +- mysys/my_seek.c | 17 +- mysys/my_static.c | 2 +- mysys/my_sync.c | 2 +- mysys/my_winerr.c | 123 ++++ mysys/my_winfile.c | 681 +++++++++++++++++++++ mysys/my_write.c | 40 +- mysys/mysys_priv.h | 24 + sql/discover.cc | 2 +- sql/item_strfunc.cc | 6 +- storage/archive/ha_archive.cc | 6 +- storage/innobase/handler/ha_innodb.cc | 22 + storage/innodb_plugin/handler/ha_innodb.cc | 25 +- storage/maria/ma_extra.c | 3 +- storage/myisam/mi_locking.c | 8 +- storage/xtradb/handler/ha_innodb.cc | 25 +- 40 files changed, 1268 insertions(+), 424 deletions(-) create mode 100644 mysys/my_winerr.c create mode 100644 mysys/my_winfile.c diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 5d458090352..59e89f02128 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -2057,7 +2057,7 @@ static Exit_status dump_local_log_entries(PRINT_EVENT_INFO *print_event_info, return ERROR_STOP; } #endif - if (init_io_cache(file, fileno(stdin), 0, READ_CACHE, (my_off_t) 0, + if (init_io_cache(file, my_fileno(stdin), 0, READ_CACHE, (my_off_t) 0, 0, MYF(MY_WME | MY_NABP | MY_DONT_CHECK_FILESIZE))) { error("Failed to init IO cache."); diff --git a/client/mysqlslap.c b/client/mysqlslap.c index c38380c7306..de1992e2d57 100644 --- a/client/mysqlslap.c +++ b/client/mysqlslap.c @@ -1223,7 +1223,7 @@ get_options(int *argc,char ***argv) if (opt_csv_str[0] == '-') { - csv_file= fileno(stdout); + csv_file= my_fileno(stdout); } else { diff --git a/client/readline.cc b/client/readline.cc index 4edccebef39..5293f7546e4 100644 --- a/client/readline.cc +++ b/client/readline.cc @@ -43,7 +43,7 @@ LINE_BUFFER *batch_readline_init(ulong max_size,FILE *file) if (!(line_buff=(LINE_BUFFER*) my_malloc(sizeof(*line_buff),MYF(MY_WME | MY_ZEROFILL)))) return 0; - if (init_line_buffer(line_buff,fileno(file),IO_SIZE,max_size)) + if (init_line_buffer(line_buff,my_fileno(file),IO_SIZE,max_size)) { my_free(line_buff,MYF(0)); return 0; diff --git a/include/config-win.h b/include/config-win.h index 6d12bb0e33f..3fd2c6c3a43 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -66,7 +66,6 @@ #endif /* File and lock constants */ -#define O_SHARE 0x1000 /* Open file in sharing mode */ #ifdef __BORLANDC__ #define F_RDLCK LK_NBLCK /* read lock */ #define F_WRLCK LK_NBRLCK /* write lock */ @@ -374,7 +373,7 @@ inline ulonglong double2ulonglong(double d) #define FN_DEVCHAR ':' #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */ -#define OS_FILE_LIMIT 2048 +#define OS_FILE_LIMIT UINT_MAX #define DO_NOT_REMOVE_THREAD_WRAPPERS #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) diff --git a/include/my_dir.h b/include/my_dir.h index 06509a3af19..90d708ac811 100644 --- a/include/my_dir.h +++ b/include/my_dir.h @@ -69,7 +69,11 @@ typedef struct my_stat #else +#if(_MSC_VER) +#define MY_STAT struct _stati64 /* 64 bit file size */ +#else #define MY_STAT struct stat /* Orginal struct have what we need */ +#endif #endif /* USE_MY_STAT_STRUCT */ diff --git a/include/my_global.h b/include/my_global.h index 8b71410dbf0..60c53ac937c 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -784,7 +784,41 @@ typedef SOCKET_SIZE_TYPE size_socket; #define FN_DIRSEP "/" /* Valid directory separators */ #define FN_ROOTDIR "/" #endif -#define MY_NFILE 64 /* This is only used to save filenames */ + +/* + MY_FILE_MIN is Windows speciality and is used to quickly detect + the mismatch of CRT and mysys file IO usage on Windows at runtime. + CRT file descriptors can be in the range 0-2047, whereas descriptors returned + by my_open() will start with 2048. If a file descriptor with value less then + MY_FILE_MIN is passed to mysys IO function, chances are it stemms from + open()/fileno() and not my_open()/my_fileno. + + For Posix, mysys functions are light wrappers around libc, and MY_FILE_MIN + is logically 0. +*/ + +#ifdef _WIN32 +#define MY_FILE_MIN 2048 +#else +#define MY_FILE_MIN 0 +#endif + +/* + MY_NFILE is the default size of my_file_info array. + + It is larger on Windows, because it all file handles are stored in my_file_info + Default size is 16384 and this should be enough for most cases.If it is not + enough, --max-open-files with larger value can be used. + + For Posix , my_file_info array is only used to store filenames for + error reporting and its size is not a limitation for number of open files. +*/ +#ifdef _WIN32 +#define MY_NFILE (16384 + MY_FILE_MIN) +#else +#define MY_NFILE 64 +#endif + #ifndef OS_FILE_LIMIT #define OS_FILE_LIMIT UINT_MAX #endif @@ -821,9 +855,8 @@ typedef SOCKET_SIZE_TYPE size_socket; /* Some things that this system doesn't have */ #define NO_HASH /* Not needed anymore */ -#ifdef __WIN__ -#define NO_DIR_LIBRARY /* Not standar dir-library */ -#define USE_MY_STAT_STRUCT /* For my_lib */ +#ifdef _WIN32 +#define NO_DIR_LIBRARY /* Not standard dir-library */ #endif /* Some defines of functions for portability */ diff --git a/include/my_sys.h b/include/my_sys.h index d391492983d..3368236694d 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -342,11 +342,12 @@ enum file_type struct st_my_file_info { - char * name; - enum file_type type; -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_t mutex; + char *name; +#ifdef _WIN32 + HANDLE fhandle; /* win32 file handle */ + int oflag; /* open flags, e.g O_APPEND */ #endif + enum file_type type; }; extern struct st_my_file_info *my_file_info; @@ -650,12 +651,12 @@ extern void *my_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); -#ifdef __WIN__ -extern int my_access(const char *path, int amode); -extern File my_sopen(const char *path, int oflag, int shflag, int pmode); +#ifdef _WIN32 +extern int my_access(const char *path, int amode); #else #define my_access access #endif + extern int check_if_legal_filename(const char *path); extern int check_if_legal_tablename(const char *path); @@ -666,6 +667,13 @@ extern int nt_share_delete(const char *name,myf MyFlags); #define my_delete_allow_opened(fname,flags) my_delete((fname),(flags)) #endif +#ifdef _WIN32 +/* Windows-only functions (CRT equivalents)*/ +extern File my_sopen(const char *path, int oflag, int shflag, int pmode); +extern HANDLE my_get_osfhandle(File fd); +extern void my_osmaperr(unsigned long last_error); +#endif + #ifndef TERMINATE extern void TERMINATE(FILE *file, uint flag); #endif @@ -675,6 +683,7 @@ extern FILE *my_fopen(const char *FileName,int Flags,myf MyFlags); extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern FILE *my_freopen(const char *path, const char *mode, FILE *stream); extern int my_fclose(FILE *fd,myf MyFlags); +extern File my_fileno(FILE *fd); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_chmod(const char *name, mode_t mode, myf my_flags); extern int my_sync(File fd, myf my_flags); diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index 4ac0b9a01ee..7618a8367de 100755 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -87,6 +87,7 @@ SET(CLIENT_SOURCES ../mysys/array.c ../strings/bchange.c ../strings/bmove.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c + ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_winfile.c ../mysys/my_winerr.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c ../mysys/safemalloc.c ../mysys/sha1.c ../strings/str2int.c ../strings/str_alloc.c ../strings/strcend.c ../strings/strcont.c ../strings/strend.c diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 9ab19222caf..0cf10e6e993 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -38,7 +38,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ my_mkdir.c my_mmap.c my_net.c my_once.c my_open.c my_pread.c my_pthread.c my_quick.c my_read.c my_realloc.c my_redel.c my_rename.c my_seek.c my_sleep.c my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c my_wincond.c - my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c stacktrace.c + my_winerr.c my_winfile.c my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c stacktrace.c rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c lf_alloc-pin.c lf_dynarray.c lf_hash.c diff --git a/mysys/default_modify.c b/mysys/default_modify.c index 88df0122da2..ccbf47176a6 100644 --- a/mysys/default_modify.c +++ b/mysys/default_modify.c @@ -21,7 +21,7 @@ #define BUFF_SIZE 1024 #define RESERVE 1024 /* Extend buffer with this extent */ -#ifdef __WIN__ +#ifdef _WIN32 #define NEWLINE "\r\n" #define NEWLINE_LEN 2 #else @@ -78,7 +78,7 @@ int modify_defaults_file(const char *file_location, const char *option, DBUG_RETURN(2); /* my_fstat doesn't use the flag parameter */ - if (my_fstat(fileno(cnf_file), &file_stat, MYF(0))) + if (my_fstat(my_fileno(cnf_file), &file_stat, MYF(0))) goto malloc_err; if (option && option_value) @@ -96,7 +96,7 @@ int modify_defaults_file(const char *file_location, const char *option, NEWLINE_LEN + /* Space for newline */ RESERVE); /* Some additional space */ - buffer_size= (file_stat.st_size + + buffer_size= (uint)(file_stat.st_size + 1); /* The ending zero */ /* @@ -213,7 +213,7 @@ int modify_defaults_file(const char *file_location, const char *option, if (opt_applied) { /* Don't write the file if there are no changes to be made */ - if (my_chsize(fileno(cnf_file), (my_off_t) (dst_ptr - file_buffer), 0, + if (my_chsize(my_fileno(cnf_file), (my_off_t) (dst_ptr - file_buffer), 0, MYF(MY_WME)) || my_fseek(cnf_file, 0, MY_SEEK_SET, MYF(0)) || my_fwrite(cnf_file, (uchar*) file_buffer, (size_t) (dst_ptr - file_buffer), diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 3824669365f..1905437cc0a 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1659,9 +1659,6 @@ int my_block_write(register IO_CACHE *info, const uchar *Buffer, size_t Count, Buffer+=length; pos+= length; Count-= length; -#ifndef HAVE_PREAD - info->seek_not_done=1; -#endif } /* Check if we want to write inside the used part of the buffer.*/ diff --git a/mysys/my_chsize.c b/mysys/my_chsize.c index b1dbb22c687..b9013811b34 100644 --- a/mysys/my_chsize.c +++ b/mysys/my_chsize.c @@ -52,20 +52,13 @@ int my_chsize(File fd, my_off_t newlength, int filler, myf MyFlags) if (oldsize > newlength) { -#if defined(HAVE_SETFILEPOINTER) - /* This is for the moment only true on windows */ - long is_success; - HANDLE win_file= (HANDLE) _get_osfhandle(fd); - long length_low, length_high; - length_low= (long) (ulong) newlength; - length_high= (long) ((ulonglong) newlength >> 32); - is_success= SetFilePointer(win_file, length_low, &length_high, FILE_BEGIN); - if (is_success == -1 && (my_errno= GetLastError()) != NO_ERROR) +#ifdef _WIN32 + if (my_win_chsize(fd, newlength)) + { + my_errno= errno; goto err; - if (SetEndOfFile(win_file)) - DBUG_RETURN(0); - my_errno= GetLastError(); - goto err; + } + DBUG_RETURN(0); #elif defined(HAVE_FTRUNCATE) if (ftruncate(fd, (off_t) newlength)) { diff --git a/mysys/my_create.c b/mysys/my_create.c index 5c9a1e027d2..d0436276d03 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -18,7 +18,7 @@ #include "mysys_err.h" #include #include -#if defined(__WIN__) +#if defined(_WIN32) #include #endif @@ -41,16 +41,12 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, FileName, CreateFlags, access_flags, MyFlags)); #if !defined(NO_OPEN_3) - fd = open((char *) FileName, access_flags | O_CREAT, + fd= open((char *) FileName, access_flags | O_CREAT, CreateFlags ? CreateFlags : my_umask); -#elif defined(VMS) - fd = open((char *) FileName, access_flags | O_CREAT, 0, - "ctx=stm","ctx=bin"); -#elif defined(__WIN__) - fd= my_sopen((char *) FileName, access_flags | O_CREAT | O_BINARY, - SH_DENYNO, MY_S_IREAD | MY_S_IWRITE); +#elif defined(_WIN32) + fd= my_win_open(FileName, access_flags | O_CREAT); #else - fd = open(FileName, access_flags); + fd= open(FileName, access_flags); #endif if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && @@ -71,6 +67,7 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, if (unlikely(fd >= 0 && rc < 0)) { int tmp= my_errno; + my_close(fd, MyFlags); my_delete(FileName, MyFlags); my_errno= tmp; } diff --git a/mysys/my_dup.c b/mysys/my_dup.c index 55f5e0c0099..5fdd6e9f364 100644 --- a/mysys/my_dup.c +++ b/mysys/my_dup.c @@ -29,7 +29,11 @@ File my_dup(File file, myf MyFlags) const char *filename; DBUG_ENTER("my_dup"); DBUG_PRINT("my",("file: %d MyFlags: %d", file, MyFlags)); - fd = dup(file); +#ifdef _WIN32 + fd= my_win_dup(file); +#else + fd= dup(file); +#endif filename= (((uint) file < my_file_limit) ? my_file_info[(int) file].name : "Unknown"); DBUG_RETURN(my_register_filename(fd, filename, FILE_BY_DUP, diff --git a/mysys/my_file.c b/mysys/my_file.c index 594f361437f..d8d51b91ab2 100644 --- a/mysys/my_file.c +++ b/mysys/my_file.c @@ -97,6 +97,7 @@ uint my_set_max_open_files(uint files) DBUG_ENTER("my_set_max_open_files"); DBUG_PRINT("enter",("files: %u my_file_limit: %u", files, my_file_limit)); + files+= MY_FILE_MIN; files= set_max_open_files(min(files, OS_FILE_LIMIT)); if (files <= MY_NFILE) DBUG_RETURN(files); diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c index a4b0c9f895d..ae631a59353 100644 --- a/mysys/my_fopen.c +++ b/mysys/my_fopen.c @@ -46,24 +46,14 @@ FILE *my_fopen(const char *filename, int flags, myf MyFlags) DBUG_ENTER("my_fopen"); DBUG_PRINT("my",("Name: '%s' flags: %d MyFlags: %d", filename, flags, MyFlags)); - /* - if we are not creating, then we need to use my_access to make sure - the file exists since Windows doesn't handle files like "com1.sym" - very well - */ -#ifdef __WIN__ - if (check_if_legal_filename(filename)) - { - errno= EACCES; - fd= 0; - } - else + + make_ftype(type,flags); + +#ifdef _WIN32 + fd= my_win_fopen(filename, type); +#else + fd= fopen(filename, type); #endif - { - make_ftype(type,flags); - fd = fopen(filename, type); - } - if (fd != 0) { /* @@ -71,18 +61,20 @@ FILE *my_fopen(const char *filename, int flags, myf MyFlags) on some OS (SUNOS). Actually the filename save isn't that important so we can ignore if this doesn't work. */ - if ((uint) fileno(fd) >= my_file_limit) + + int filedesc= my_fileno(fd); + if ((uint)filedesc >= my_file_limit) { thread_safe_increment(my_stream_opened,&THR_LOCK_open); DBUG_RETURN(fd); /* safeguard */ } pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fileno(fd)].name = (char*) + if ((my_file_info[filedesc].name= (char*) my_strdup(filename,MyFlags))) { my_stream_opened++; my_file_total_opened++; - my_file_info[fileno(fd)].type = STREAM_BY_FOPEN; + my_file_info[filedesc].type= STREAM_BY_FOPEN; pthread_mutex_unlock(&THR_LOCK_open); DBUG_PRINT("exit",("stream: 0x%lx", (long) fd)); DBUG_RETURN(fd); @@ -240,8 +232,13 @@ int my_fclose(FILE *fd, myf MyFlags) DBUG_PRINT("my",("stream: 0x%lx MyFlags: %d", (long) fd, MyFlags)); pthread_mutex_lock(&THR_LOCK_open); - file=fileno(fd); - if ((err = fclose(fd)) < 0) + file= my_fileno(fd); +#ifndef _WIN32 + err= fclose(fd); +#else + err= my_win_fclose(fd); +#endif + if(err < 0) { my_errno=errno; if (MyFlags & (MY_FAE | MY_WME)) @@ -272,7 +269,12 @@ FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags) Filedes, Flags, MyFlags)); make_ftype(type,Flags); - if ((fd = fdopen(Filedes, type)) == 0) +#ifdef _WIN32 + fd= my_win_fdopen(Filedes, type); +#else + fd= fdopen(Filedes, type); +#endif + if (!fd) { my_errno=errno; if (MyFlags & (MY_FAE | MY_WME)) diff --git a/mysys/my_fstream.c b/mysys/my_fstream.c index f3b5418b906..2059e1a9f18 100644 --- a/mysys/my_fstream.c +++ b/mysys/my_fstream.c @@ -56,11 +56,11 @@ size_t my_fread(FILE *stream, uchar *Buffer, size_t Count, myf MyFlags) { if (ferror(stream)) my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); else if (MyFlags & (MY_NABP | MY_FNABP)) my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); } my_errno=errno ? errno : -1; if (ferror(stream) || MyFlags & (MY_NABP | MY_FNABP)) @@ -142,7 +142,7 @@ size_t my_fwrite(FILE *stream, const uchar *Buffer, size_t Count, myf MyFlags) if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { my_error(EE_WRITE, MYF(ME_BELL+ME_WAITTANG), - my_filename(fileno(stream)),errno); + my_filename(my_fileno(stream)),errno); } writtenbytes= (size_t) -1; /* Return that we got error */ break; @@ -182,3 +182,14 @@ my_off_t my_ftell(FILE *stream, myf MyFlags __attribute__((unused))) DBUG_PRINT("exit",("ftell: %lu",(ulong) pos)); DBUG_RETURN((my_off_t) pos); } /* my_ftell */ + + +/* Get a File corresponding to the stream*/ +int my_fileno(FILE *f) +{ +#ifdef _WIN32 + return my_win_fileno(f); +#else + return fileno(f); +#endif +} diff --git a/mysys/my_init.c b/mysys/my_init.c index e7ab9ba7a1f..7f174cea1f0 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -274,6 +274,7 @@ void my_parameter_handler(const wchar_t * expression, const wchar_t * function, { DBUG_PRINT("my",("Expression: %s function: %s file: %s, line: %d", expression, function, file, line)); + __debugbreak(); } @@ -298,7 +299,7 @@ int handle_rtc_failure(int err_type, const char *file, int line, fprintf(stderr, " At %s:%d\n", file, line); va_end(args); (void) fflush(stderr); - + __debugbreak(); return 0; /* Error is handled */ } #pragma runtime_checks("", restore) diff --git a/mysys/my_lib.c b/mysys/my_lib.c index c18d14fb549..033f8789b49 100644 --- a/mysys/my_lib.c +++ b/mysys/my_lib.c @@ -35,8 +35,7 @@ # if defined(HAVE_NDIR_H) # include # endif -# if defined(__WIN__) -# include +# if defined(_WIN32) # ifdef __BORLANDC__ # include # endif @@ -92,7 +91,7 @@ static int comp_names(struct fileinfo *a, struct fileinfo *b) } /* comp_names */ -#if !defined(__WIN__) +#if !defined(_WIN32) MY_DIR *my_dir(const char *path, myf MyFlags) { @@ -507,19 +506,24 @@ error: DBUG_RETURN((MY_DIR *) NULL); } /* my_dir */ -#endif /* __WIN__ */ +#endif /* _WIN32 */ /**************************************************************************** ** File status ** Note that MY_STAT is assumed to be same as struct stat ****************************************************************************/ -int my_fstat(int Filedes, MY_STAT *stat_area, + +int my_fstat(File Filedes, MY_STAT *stat_area, myf MyFlags __attribute__((unused))) { DBUG_ENTER("my_fstat"); DBUG_PRINT("my",("fd: %d MyFlags: %d", Filedes, MyFlags)); +#ifdef _WIN32 + DBUG_RETURN(my_win_fstat(Filedes, stat_area)); +#else DBUG_RETURN(fstat(Filedes, (struct stat *) stat_area)); +#endif } @@ -531,11 +535,15 @@ MY_STAT *my_stat(const char *path, MY_STAT *stat_area, myf my_flags) (long) stat_area, my_flags)); if ((m_used= (stat_area == NULL))) - if (!(stat_area = (MY_STAT *) my_malloc(sizeof(MY_STAT), my_flags))) + if (!(stat_area= (MY_STAT *) my_malloc(sizeof(MY_STAT), my_flags))) goto error; - if (! stat((char *) path, (struct stat *) stat_area) ) - DBUG_RETURN(stat_area); - +#ifndef _WIN32 + if (! stat((char *) path, (struct stat *) stat_area) ) + DBUG_RETURN(stat_area); +#else + if (! my_win_stat(path, stat_area) ) + DBUG_RETURN(stat_area); +#endif DBUG_PRINT("error",("Got errno: %d from stat", errno)); my_errno= errno; if (m_used) /* Free if new area */ diff --git a/mysys/my_lock.c b/mysys/my_lock.c index 8450fcfc30a..6bbb177e4b6 100644 --- a/mysys/my_lock.c +++ b/mysys/my_lock.c @@ -22,13 +22,113 @@ #undef NO_ALARM_LOOP #endif #include -#ifdef __WIN__ -#include -#endif #ifdef __NETWARE__ #include #endif +#ifdef _WIN32 +#define WIN_LOCK_INFINITE -1 +#define WIN_LOCK_SLEEP_MILLIS 100 + +static int win_lock(File fd, int locktype, my_off_t start, my_off_t length, + int timeout_sec) +{ + LARGE_INTEGER liOffset,liLength; + DWORD dwFlags; + OVERLAPPED ov= {0}; + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); + DWORD lastError= 0; + int i; + int timeout_millis= timeout_sec * 1000; + + DBUG_ENTER("win_lock"); + + liOffset.QuadPart= start; + liLength.QuadPart= length; + + ov.Offset= liOffset.LowPart; + ov.OffsetHigh= liOffset.HighPart; + + if (locktype == F_UNLCK) + { + if (UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + /* + For compatibility with fcntl implementation, ignore error, + if region was not locked + */ + if (GetLastError() == ERROR_NOT_LOCKED) + { + SetLastError(0); + DBUG_RETURN(0); + } + goto error; + } + else if (locktype == F_RDLCK) + /* read lock is mapped to a shared lock. */ + dwFlags= 0; + else + /* write lock is mapped to an exclusive lock. */ + dwFlags= LOCKFILE_EXCLUSIVE_LOCK; + + /* + Drop old lock first to avoid double locking. + During analyze of Bug#38133 (Myisamlog test fails on Windows) + I met the situation that the program myisamlog locked the file + exclusively, then additionally shared, then did one unlock, and + then blocked on an attempt to lock it exclusively again. + Unlocking before every lock fixed the problem. + Note that this introduces a race condition. When the application + wants to convert an exclusive lock into a shared one, it will now + first unlock the file and then lock it shared. A waiting exclusive + lock could step in here. For reasons described in Bug#38133 and + Bug#41124 (Server hangs on Windows with --external-locking after + INSERT...SELECT) and in the review thread at + http://lists.mysql.com/commits/60721 it seems to be the better + option than not to unlock here. + If one day someone notices a way how to do file lock type changes + on Windows without unlocking before taking the new lock, please + change this code accordingly to fix the race condition. + */ + if (!UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov) && + (GetLastError() != ERROR_NOT_LOCKED)) + goto error; + + if (timeout_sec == WIN_LOCK_INFINITE) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + goto error; + } + + dwFlags|= LOCKFILE_FAIL_IMMEDIATELY; + timeout_millis= timeout_sec * 1000; + /* Try lock in a loop, until the lock is acquired or timeout happens */ + for(i= 0; ;i+= WIN_LOCK_SLEEP_MILLIS) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + + if (GetLastError() != ERROR_LOCK_VIOLATION) + goto error; + + if (i >= timeout_millis) + break; + Sleep(WIN_LOCK_SLEEP_MILLIS); + } + + /* timeout */ + errno= EAGAIN; + DBUG_RETURN(-1); + +error: + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} +#endif + + + /* Lock a part of a file @@ -97,29 +197,16 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, DBUG_RETURN(0); } } -#elif defined(HAVE_LOCKING) - /* Windows */ +#elif defined(_WIN32) { - my_bool error= FALSE; - pthread_mutex_lock(&my_file_info[fd].mutex); - if (MyFlags & MY_SEEK_NOT_DONE) - { - if( my_seek(fd,start,MY_SEEK_SET,MYF(MyFlags & ~MY_SEEK_NOT_DONE)) - == MY_FILEPOS_ERROR ) - { - /* - If my_seek fails my_errno will already contain an error code; - just unlock and return error code. - */ - DBUG_PRINT("error",("my_errno: %d (%d)",my_errno,errno)); - pthread_mutex_unlock(&my_file_info[fd].mutex); - DBUG_RETURN(-1); - } - } - error= locking(fd,locktype,(ulong) length) && errno != EINVAL; - pthread_mutex_unlock(&my_file_info[fd].mutex); - if (!error) - DBUG_RETURN(0); + int timeout_sec; + if (MyFlags & MY_NO_WAIT) + timeout_sec= 0; + else + timeout_sec= WIN_LOCK_INFINITE; + + if(win_lock(fd, locktype, start, length, timeout_sec) == 0) + DBUG_RETURN(0); } #else #if defined(HAVE_FCNTL) diff --git a/mysys/my_mmap.c b/mysys/my_mmap.c index 023a06fd896..303d8efaf30 100644 --- a/mysys/my_mmap.c +++ b/mysys/my_mmap.c @@ -27,17 +27,17 @@ int my_msync(int fd, void *addr, size_t len, int flags) return my_sync(fd, MYF(0)); } -#elif defined(__WIN__) +#elif defined(_WIN32) static SECURITY_ATTRIBUTES mmap_security_attributes= {sizeof(SECURITY_ATTRIBUTES), 0, TRUE}; void *my_mmap(void *addr, size_t len, int prot, - int flags, int fd, my_off_t offset) + int flags, File fd, my_off_t offset) { HANDLE hFileMap; LPVOID ptr; - HANDLE hFile= (HANDLE)_get_osfhandle(fd); + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); if (hFile == INVALID_HANDLE_VALUE) return MAP_FAILED; diff --git a/mysys/my_open.c b/mysys/my_open.c index fe7f65c450b..b1788506832 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -17,9 +17,7 @@ #include "mysys_err.h" #include #include -#if defined(__WIN__) -#include -#endif + /* Open a file @@ -43,29 +41,8 @@ File my_open(const char *FileName, int Flags, myf MyFlags) DBUG_ENTER("my_open"); DBUG_PRINT("my",("Name: '%s' Flags: %d MyFlags: %d", FileName, Flags, MyFlags)); -#if defined(__WIN__) - /* - Check that we don't try to open or create a file name that may - cause problems for us in the future (like PRN) - */ - if (check_if_legal_filename(FileName)) - { - errno= EACCES; - DBUG_RETURN(my_register_filename(-1, FileName, FILE_BY_OPEN, - EE_FILENOTFOUND, MyFlags)); - } -#ifndef __WIN__ - if (Flags & O_SHARE) - fd = sopen((char *) FileName, (Flags & ~O_SHARE) | O_BINARY, SH_DENYNO, - MY_S_IREAD | MY_S_IWRITE); - else - fd = open((char *) FileName, Flags | O_BINARY, - MY_S_IREAD | MY_S_IWRITE); -#else - fd= my_sopen((char *) FileName, (Flags & ~O_SHARE) | O_BINARY, SH_DENYNO, - MY_S_IREAD | MY_S_IWRITE); -#endif - +#if defined(_WIN32) + fd= my_win_open(FileName, Flags); #elif !defined(NO_OPEN_3) fd = open(FileName, Flags, my_umask); /* Normal unix */ #else @@ -94,11 +71,14 @@ int my_close(File fd, myf MyFlags) DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags)); pthread_mutex_lock(&THR_LOCK_open); +#ifndef _WIN32 do { err= close(fd); } while (err == -1 && errno == EINTR); - +#else + err= my_win_close(fd); +#endif if (err) { DBUG_PRINT("error",("Got error %d on close",err)); @@ -109,9 +89,6 @@ int my_close(File fd, myf MyFlags) if ((uint) fd < my_file_limit && my_file_info[fd].type != UNOPEN) { my_free(my_file_info[fd].name, MYF(0)); -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_destroy(&my_file_info[fd].mutex); -#endif my_file_info[fd].type = UNOPEN; } my_file_opened--; @@ -141,16 +118,12 @@ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { DBUG_ENTER("my_register_filename"); - if ((int) fd >= 0) + if ((int) fd >= MY_FILE_MIN) { if ((uint) fd >= my_file_limit) { -#if defined(THREAD) && !defined(HAVE_PREAD) - my_errno= EMFILE; -#else thread_safe_increment(my_file_opened,&THR_LOCK_open); DBUG_RETURN(fd); /* safeguard */ -#endif } else { @@ -160,9 +133,6 @@ File my_register_filename(File fd, const char *FileName, enum file_type my_file_opened++; my_file_total_opened++; my_file_info[fd].type = type_of_file; -#if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); -#endif pthread_mutex_unlock(&THR_LOCK_open); DBUG_PRINT("exit",("fd: %d",fd)); DBUG_RETURN(fd); @@ -187,188 +157,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type DBUG_RETURN(-1); } -#ifdef __WIN__ -extern void __cdecl _dosmaperr(unsigned long); - -/* - Open a file with sharing. Similar to _sopen() from libc, but allows managing - share delete on win32 - - SYNOPSIS - my_sopen() - path fully qualified file name - oflag operation flags - shflag share flag - pmode permission flags - - RETURN VALUE - File descriptor of opened file if success - -1 and sets errno if fails. -*/ - -File my_sopen(const char *path, int oflag, int shflag, int pmode) -{ - int fh; /* handle of opened file */ - int mask; - HANDLE osfh; /* OS handle of opened file */ - DWORD fileaccess; /* OS file access (requested) */ - DWORD fileshare; /* OS file sharing mode */ - DWORD filecreate; /* OS method of opening/creating */ - DWORD fileattrib; /* OS file attribute flags */ - SECURITY_ATTRIBUTES SecurityAttributes; - - SecurityAttributes.nLength= sizeof(SecurityAttributes); - SecurityAttributes.lpSecurityDescriptor= NULL; - SecurityAttributes.bInheritHandle= !(oflag & _O_NOINHERIT); - - /* - * decode the access flags - */ - switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { - case _O_RDONLY: /* read access */ - fileaccess= GENERIC_READ; - break; - case _O_WRONLY: /* write access */ - fileaccess= GENERIC_WRITE; - break; - case _O_RDWR: /* read and write access */ - fileaccess= GENERIC_READ | GENERIC_WRITE; - break; - default: /* error, bad oflag */ - errno= EINVAL; - _doserrno= 0L; /* not an OS error */ - return -1; - } - - /* - * decode sharing flags - */ - switch (shflag) { - case _SH_DENYRW: /* exclusive access except delete */ - fileshare= FILE_SHARE_DELETE; - break; - case _SH_DENYWR: /* share read and delete access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_DELETE; - break; - case _SH_DENYRD: /* share write and delete access */ - fileshare= FILE_SHARE_WRITE | FILE_SHARE_DELETE; - break; - case _SH_DENYNO: /* share read, write and delete access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; - break; - case _SH_DENYRWD: /* exclusive access */ - fileshare= 0L; - break; - case _SH_DENYWRD: /* share read access */ - fileshare= FILE_SHARE_READ; - break; - case _SH_DENYRDD: /* share write access */ - fileshare= FILE_SHARE_WRITE; - break; - case _SH_DENYDEL: /* share read and write access */ - fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE; - break; - default: /* error, bad shflag */ - errno= EINVAL; - _doserrno= 0L; /* not an OS error */ - return -1; - } - - /* - * decode open/create method flags - */ - switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { - case 0: - case _O_EXCL: /* ignore EXCL w/o CREAT */ - filecreate= OPEN_EXISTING; - break; - - case _O_CREAT: - filecreate= OPEN_ALWAYS; - break; - - case _O_CREAT | _O_EXCL: - case _O_CREAT | _O_TRUNC | _O_EXCL: - filecreate= CREATE_NEW; - break; - - case _O_TRUNC: - case _O_TRUNC | _O_EXCL: /* ignore EXCL w/o CREAT */ - filecreate= TRUNCATE_EXISTING; - break; - - case _O_CREAT | _O_TRUNC: - filecreate= CREATE_ALWAYS; - break; - - default: - /* this can't happen ... all cases are covered */ - errno= EINVAL; - _doserrno= 0L; - return -1; - } - - /* - * decode file attribute flags if _O_CREAT was specified - */ - fileattrib= FILE_ATTRIBUTE_NORMAL; /* default */ - if (oflag & _O_CREAT) - { - _umask((mask= _umask(0))); - - if (!((pmode & ~mask) & _S_IWRITE)) - fileattrib= FILE_ATTRIBUTE_READONLY; - } - - /* - * Set temporary file (delete-on-close) attribute if requested. - */ - if (oflag & _O_TEMPORARY) - { - fileattrib|= FILE_FLAG_DELETE_ON_CLOSE; - fileaccess|= DELETE; - } - - /* - * Set temporary file (delay-flush-to-disk) attribute if requested. - */ - if (oflag & _O_SHORT_LIVED) - fileattrib|= FILE_ATTRIBUTE_TEMPORARY; - - /* - * Set sequential or random access attribute if requested. - */ - if (oflag & _O_SEQUENTIAL) - fileattrib|= FILE_FLAG_SEQUENTIAL_SCAN; - else if (oflag & _O_RANDOM) - fileattrib|= FILE_FLAG_RANDOM_ACCESS; - - /* - * try to open/create the file - */ - if ((osfh= CreateFile(path, fileaccess, fileshare, &SecurityAttributes, - filecreate, fileattrib, NULL)) == INVALID_HANDLE_VALUE) - { - /* - * OS call to open/create file failed! map the error, release - * the lock, and return -1. note that it's not necessary to - * call _free_osfhnd (it hasn't been used yet). - */ - _dosmaperr(GetLastError()); /* map error */ - return -1; /* return error to caller */ - } - - if ((fh= _open_osfhandle((intptr_t)osfh, - oflag & (_O_APPEND | _O_RDONLY | _O_TEXT))) == -1) - { - _dosmaperr(GetLastError()); /* map error */ - CloseHandle(osfh); - } - - return fh; /* return handle */ -} -#endif /* __WIN__ */ #ifdef EXTRA_DEBUG diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 836f5a92963..6421a2da601 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -18,7 +18,7 @@ #include "my_base.h" #include #include -#ifdef HAVE_PREAD +#ifndef _WIN32 #include #endif @@ -48,9 +48,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, { size_t readbytes; int error= 0; -#ifndef HAVE_PREAD - int save_errno; -#endif + #ifndef DBUG_OFF char llbuf[22]; DBUG_ENTER("my_pread"); @@ -61,20 +59,15 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, for (;;) { errno= 0; /* Linux, Windows don't reset this on EOF/success */ -#ifndef HAVE_PREAD - pthread_mutex_lock(&my_file_info[Filedes].mutex); - readbytes= (uint) -1; - error= (lseek(Filedes, offset, MY_SEEK_SET) == (my_off_t) -1 || - (readbytes= read(Filedes, Buffer, (uint) Count)) != Count); - save_errno= errno; - pthread_mutex_unlock(&my_file_info[Filedes].mutex); +#ifdef _WIN32 + readbytes= my_win_pread(Filedes, Buffer, Count, offset); +#else + readbytes= pread(Filedes, Buffer, Count, offset); +#endif + error = (readbytes != Count); + if (error) { - errno= save_errno; -#else - if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) - { -#endif my_errno= errno ? errno : -1; if (errno == 0 || (readbytes != (size_t) -1 && (MyFlags & (MY_NABP | MY_FNABP)))) @@ -91,19 +84,19 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, #endif if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { - if (readbytes == (size_t) -1) - my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), - my_filename(Filedes),my_errno); - else if (MyFlags & (MY_NABP | MY_FNABP)) - my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), - my_filename(Filedes),my_errno); + if (readbytes == (size_t) -1) + my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), + my_filename(Filedes),my_errno); + else if (MyFlags & (MY_NABP | MY_FNABP)) + my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), + my_filename(Filedes),my_errno); } if (readbytes == (size_t) -1 || (MyFlags & (MY_FNABP | MY_NABP))) - DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ + DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ } if (MyFlags & (MY_NABP | MY_FNABP)) - DBUG_RETURN(0); /* Read went ok; Return 0 */ - DBUG_RETURN(readbytes); /* purecov: inspected */ + DBUG_RETURN(0); /* Read went ok; Return 0 */ + DBUG_RETURN(readbytes); /* purecov: inspected */ } } /* my_pread */ @@ -132,7 +125,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) { - size_t writenbytes, written; + size_t writtenbytes, written; uint errors; #ifndef DBUG_OFF char llbuf[22]; @@ -146,28 +139,22 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, for (;;) { -#ifndef HAVE_PREAD - int error; - writenbytes= (size_t) -1; - pthread_mutex_lock(&my_file_info[Filedes].mutex); - error= (lseek(Filedes, offset, MY_SEEK_SET) != (my_off_t) -1 && - (writenbytes = write(Filedes, Buffer, (uint) Count)) == Count); - pthread_mutex_unlock(&my_file_info[Filedes].mutex); - if (error) - break; +#ifdef _WIN32 + writtenbytes= my_win_pwrite(Filedes, Buffer, Count,offset); #else - if ((writenbytes= pwrite(Filedes, Buffer, Count,offset)) == Count) + writtenbytes= pwrite(Filedes, Buffer, Count, offset); +#endif + if (writtenbytes == Count) break; my_errno= errno; -#endif - if (writenbytes != (size_t) -1) + if (writtenbytes != (size_t) -1) { /* Safegueard */ - written+=writenbytes; - Buffer+=writenbytes; - Count-=writenbytes; - offset+=writenbytes; + written+=writtenbytes; + Buffer+=writtenbytes; + Count-=writtenbytes; + offset+=writtenbytes; } - DBUG_PRINT("error",("Write only %u bytes", (uint) writenbytes)); + DBUG_PRINT("error",("Write only %u bytes", (uint) writtenbytes)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) @@ -180,15 +167,15 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, errors++; continue; } - if ((writenbytes && writenbytes != (size_t) -1) || my_errno == EINTR) + if ((writtenbytes && writtenbytes != (size_t) -1) || my_errno == EINTR) continue; /* Retry */ #endif if (MyFlags & (MY_NABP | MY_FNABP)) { if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { - my_error(EE_WRITE, MYF(ME_BELL | ME_WAITTANG), - my_filename(Filedes),my_errno); + my_error(EE_WRITE, MYF(ME_BELL | ME_WAITTANG), + my_filename(Filedes),my_errno); } DBUG_RETURN(MY_FILE_ERROR); /* Error on read */ } @@ -198,5 +185,5 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, DBUG_EXECUTE_IF("check", my_seek(Filedes, -1, SEEK_SET, MYF(0));); if (MyFlags & (MY_NABP | MY_FNABP)) DBUG_RETURN(0); /* Want only errors */ - DBUG_RETURN(writenbytes+written); /* purecov: inspected */ + DBUG_RETURN(writtenbytes+written); /* purecov: inspected */ } /* my_pwrite */ diff --git a/mysys/my_quick.c b/mysys/my_quick.c index 0ba20a5bdee..b93e7e17224 100644 --- a/mysys/my_quick.c +++ b/mysys/my_quick.c @@ -19,11 +19,19 @@ #include "my_nosys.h" +#ifdef _WIN32 +extern size_t my_win_read(File Filedes,uchar *Buffer,size_t Count); +#endif + size_t my_quick_read(File Filedes,uchar *Buffer,size_t Count,myf MyFlags) { size_t readbytes; - - if ((readbytes = read(Filedes, Buffer, (uint) Count)) != Count) +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= read(Filedes, Buffer, Count); +#endif + if(readbytes != Count) { #ifndef DBUG_OFF if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR) @@ -40,8 +48,13 @@ size_t my_quick_read(File Filedes,uchar *Buffer,size_t Count,myf MyFlags) } -size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) + +size_t my_quick_write(File Filedes, const uchar *Buffer, size_t Count) { +#ifdef _WIN32 + return my_win_write(Filedes, Buffer, Count); +#else + #ifndef DBUG_OFF size_t writtenbytes; #endif @@ -50,7 +63,7 @@ size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) #ifndef DBUG_OFF writtenbytes = #endif - (size_t) write(Filedes,Buffer, (uint) Count)) != Count) + (size_t) write(Filedes,Buffer,Count)) != Count) { #ifndef DBUG_OFF if ((writtenbytes == 0 || writtenbytes == (size_t) -1) && errno == EINTR) @@ -64,4 +77,5 @@ size_t my_quick_write(File Filedes,const uchar *Buffer,size_t Count) return (size_t) -1; } return 0; +#endif } diff --git a/mysys/my_read.c b/mysys/my_read.c index 25ffe73d813..9c76193aa63 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -44,7 +44,12 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) for (;;) { errno= 0; /* Linux, Windows don't reset this on EOF/success */ - if ((readbytes= read(Filedes, Buffer, (uint) Count)) != Count) +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= my_read(Fildes, Buffer, Count); +#endif + if (readbytes != Count) { my_errno= errno; if (errno == 0 || (readbytes != (size_t) -1 && diff --git a/mysys/my_seek.c b/mysys/my_seek.c index d186d56869a..24941517487 100644 --- a/mysys/my_seek.c +++ b/mysys/my_seek.c @@ -56,16 +56,11 @@ my_off_t my_seek(File fd, my_off_t pos, int whence, myf MyFlags) Make sure we are using a valid file descriptor! */ DBUG_ASSERT(fd != -1); -#if defined(THREAD) && !defined(HAVE_PREAD) - if (MyFlags & MY_THREADSAFE) - { - pthread_mutex_lock(&my_file_info[fd].mutex); - newpos= lseek(fd, pos, whence); - pthread_mutex_unlock(&my_file_info[fd].mutex); - } - else +#ifdef _WIN32 + newpos= my_win_lseek(fd, pos, whence); +#else + newpos= lseek(fd, pos, whence); #endif - newpos= lseek(fd, pos, whence); if (newpos == (os_off_t) -1) { my_errno= errno; @@ -91,7 +86,9 @@ my_off_t my_tell(File fd, myf MyFlags) DBUG_ENTER("my_tell"); DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags)); DBUG_ASSERT(fd >= 0); -#ifdef HAVE_TELL +#ifdef _WIN32 + pos= my_seek(fd, 0, MY_SEEK_CUR,0); +#elif defined(HAVE_TELL) pos=tell(fd); #else pos=lseek(fd, 0L, MY_SEEK_CUR); diff --git a/mysys/my_static.c b/mysys/my_static.c index 08653d03d21..629ee26efb9 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -35,7 +35,7 @@ int NEAR my_umask=0664, NEAR my_umask_dir=0777; #ifndef THREAD int NEAR my_errno=0; #endif -struct st_my_file_info my_file_info_default[MY_NFILE]= {{0,UNOPEN}}; +struct st_my_file_info my_file_info_default[MY_NFILE]; uint my_file_limit= MY_NFILE; struct st_my_file_info *my_file_info= my_file_info_default; diff --git a/mysys/my_sync.c b/mysys/my_sync.c index dae20b0163e..9c5fbce7ab7 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -71,7 +71,7 @@ int my_sync(File fd, myf my_flags) if (res == -1 && errno == ENOLCK) res= 0; /* Result Bug in Old FreeBSD */ #elif defined(__WIN__) - res= _commit(fd); + res= my_win_fsync(fd); #else #error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ diff --git a/mysys/my_winerr.c b/mysys/my_winerr.c new file mode 100644 index 00000000000..534078b6737 --- /dev/null +++ b/mysys/my_winerr.c @@ -0,0 +1,123 @@ +/* Copyright (C) 2008 MySQL AB + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Convert Windows API error (GetLastError() to Posix equivalent (errno) + The exported function my_osmaperr() is modelled after and borrows + heavily from undocumented _dosmaperr()(found of the static Microsoft C runtime). +*/ + +#include +#include + + +struct errentry +{ + unsigned long oscode; /* OS return value */ + int sysv_errno; /* System V error code */ +}; + +static struct errentry errtable[]= { + { ERROR_INVALID_FUNCTION, EINVAL }, /* 1 */ + { ERROR_FILE_NOT_FOUND, ENOENT }, /* 2 */ + { ERROR_PATH_NOT_FOUND, ENOENT }, /* 3 */ + { ERROR_TOO_MANY_OPEN_FILES, EMFILE }, /* 4 */ + { ERROR_ACCESS_DENIED, EACCES }, /* 5 */ + { ERROR_INVALID_HANDLE, EBADF }, /* 6 */ + { ERROR_ARENA_TRASHED, ENOMEM }, /* 7 */ + { ERROR_NOT_ENOUGH_MEMORY, ENOMEM }, /* 8 */ + { ERROR_INVALID_BLOCK, ENOMEM }, /* 9 */ + { ERROR_BAD_ENVIRONMENT, E2BIG }, /* 10 */ + { ERROR_BAD_FORMAT, ENOEXEC }, /* 11 */ + { ERROR_INVALID_ACCESS, EINVAL }, /* 12 */ + { ERROR_INVALID_DATA, EINVAL }, /* 13 */ + { ERROR_INVALID_DRIVE, ENOENT }, /* 15 */ + { ERROR_CURRENT_DIRECTORY, EACCES }, /* 16 */ + { ERROR_NOT_SAME_DEVICE, EXDEV }, /* 17 */ + { ERROR_NO_MORE_FILES, ENOENT }, /* 18 */ + { ERROR_LOCK_VIOLATION, EACCES }, /* 33 */ + { ERROR_BAD_NETPATH, ENOENT }, /* 53 */ + { ERROR_NETWORK_ACCESS_DENIED, EACCES }, /* 65 */ + { ERROR_BAD_NET_NAME, ENOENT }, /* 67 */ + { ERROR_FILE_EXISTS, EEXIST }, /* 80 */ + { ERROR_CANNOT_MAKE, EACCES }, /* 82 */ + { ERROR_FAIL_I24, EACCES }, /* 83 */ + { ERROR_INVALID_PARAMETER, EINVAL }, /* 87 */ + { ERROR_NO_PROC_SLOTS, EAGAIN }, /* 89 */ + { ERROR_DRIVE_LOCKED, EACCES }, /* 108 */ + { ERROR_BROKEN_PIPE, EPIPE }, /* 109 */ + { ERROR_DISK_FULL, ENOSPC }, /* 112 */ + { ERROR_INVALID_TARGET_HANDLE, EBADF }, /* 114 */ + { ERROR_INVALID_HANDLE, EINVAL }, /* 124 */ + { ERROR_WAIT_NO_CHILDREN, ECHILD }, /* 128 */ + { ERROR_CHILD_NOT_COMPLETE, ECHILD }, /* 129 */ + { ERROR_DIRECT_ACCESS_HANDLE, EBADF }, /* 130 */ + { ERROR_NEGATIVE_SEEK, EINVAL }, /* 131 */ + { ERROR_SEEK_ON_DEVICE, EACCES }, /* 132 */ + { ERROR_DIR_NOT_EMPTY, ENOTEMPTY }, /* 145 */ + { ERROR_NOT_LOCKED, EACCES }, /* 158 */ + { ERROR_BAD_PATHNAME, ENOENT }, /* 161 */ + { ERROR_MAX_THRDS_REACHED, EAGAIN }, /* 164 */ + { ERROR_LOCK_FAILED, EACCES }, /* 167 */ + { ERROR_ALREADY_EXISTS, EEXIST }, /* 183 */ + { ERROR_FILENAME_EXCED_RANGE, ENOENT }, /* 206 */ + { ERROR_NESTING_NOT_ALLOWED, EAGAIN }, /* 215 */ + { ERROR_NOT_ENOUGH_QUOTA, ENOMEM } /* 1816 */ +}; + +/* size of the table */ +#define ERRTABLESIZE (sizeof(errtable)/sizeof(errtable[0])) + +/* The following two constants must be the minimum and maximum +values in the (contiguous) range of Exec Failure errors. */ +#define MIN_EXEC_ERROR ERROR_INVALID_STARTING_CODESEG +#define MAX_EXEC_ERROR ERROR_INFLOOP_IN_RELOC_CHAIN + +/* These are the low and high value in the range of errors that are +access violations */ +#define MIN_EACCES_RANGE ERROR_WRITE_PROTECT +#define MAX_EACCES_RANGE ERROR_SHARING_BUFFER_EXCEEDED + + +static int get_errno_from_oserr(unsigned long oserrno) +{ + int i; + + /* check the table for the OS error code */ + for (i= 0; i < ERRTABLESIZE; ++i) + { + if (oserrno == errtable[i].oscode) + { + return errtable[i].sysv_errno; + } + } + + /* The error code wasn't in the table. We check for a range of */ + /* EACCES errors or exec failure errors (ENOEXEC). Otherwise */ + /* EINVAL is returned. */ + + if (oserrno >= MIN_EACCES_RANGE && oserrno <= MAX_EACCES_RANGE) + return EACCES; + else if (oserrno >= MIN_EXEC_ERROR && oserrno <= MAX_EXEC_ERROR) + return ENOEXEC; + else + return EINVAL; +} + +/* Set errno corresponsing to GetLastError() value */ +void my_osmaperr ( unsigned long oserrno) +{ + errno= get_errno_from_oserr(oserrno); +} diff --git a/mysys/my_winfile.c b/mysys/my_winfile.c new file mode 100644 index 00000000000..f63c35ba47b --- /dev/null +++ b/mysys/my_winfile.c @@ -0,0 +1,681 @@ +/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + The purpose of this file is to provide implementation of file IO routines on + Windows that can be thought as drop-in replacement for corresponding C runtime + functionality. + + Compared to Windows CRT, this one + - does not have the same file descriptor + limitation (default is 16384 and can be increased further, whereas CRT poses + a hard limit of 2048 file descriptors) + - the file operations are not serialized + - positional IO pread/pwrite is ported here. + - no text mode for files, all IO is "binary" + + Naming convention: + All routines are prefixed with my_win_, e.g Posix open() is implemented with + my_win_open() + + Implemented are + - POSIX routines(e.g open, read, lseek ...) + - Some ANSI C stream routines (fopen, fdopen, fileno, fclose) + - Windows CRT equvalients (my_get_osfhandle, open_osfhandle) + + Worth to note: + - File descriptors used here are located in a range that is not compatible + with CRT on purpose. Attempt to use a file descriptor from Windows CRT library + range in my_win_* function will be punished with DBUG_ASSERT() + + - File streams (FILE *) are actually from the C runtime. The routines provided + here are useful only in scernarios that use low-level IO with my_win_fileno() +*/ + +#ifdef _WIN32 + +#include "mysys_priv.h" +#include +#include + +/* Associates a file descriptor with an existing operating-system file handle.*/ +File my_open_osfhandle(HANDLE handle, int oflag) +{ + int offset= -1; + uint i; + DBUG_ENTER("my_open_osfhandle"); + + pthread_mutex_lock(&THR_LOCK_open); + for(i= MY_FILE_MIN; i < my_file_limit;i++) + { + if(my_file_info[i].fhandle == 0) + { + struct st_my_file_info *finfo= &(my_file_info[i]); + finfo->type= FILE_BY_OPEN; + finfo->fhandle= handle; + finfo->oflag= oflag; + offset= i; + break; + } + } + pthread_mutex_unlock(&THR_LOCK_open); + if(offset == -1) + errno= EMFILE; /* to many file handles open */ + DBUG_RETURN(offset); +} + + +static void invalidate_fd(File fd) +{ + DBUG_ENTER("invalidate_fd"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + my_file_info[fd].fhandle= 0; + DBUG_VOID_RETURN; +} + + +/* Get Windows handle for a file descriptor */ +HANDLE my_get_osfhandle(File fd) +{ + DBUG_ENTER("my_get_osfhandle"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].fhandle); +} + + +static int my_get_open_flags(File fd) +{ + DBUG_ENTER("my_get_open_flags"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].oflag); +} + + +/* + Open a file with sharing. Similar to _sopen() from libc, but allows managing + share delete on win32 + + SYNOPSIS + my_win_sopen() + path file name + oflag operation flags + shflag share flag + pmode permission flags + + RETURN VALUE + File descriptor of opened file if success + -1 and sets errno if fails. +*/ + +File my_win_sopen(const char *path, int oflag, int shflag, int pmode) +{ + int fh; /* handle of opened file */ + int mask; + HANDLE osfh; /* OS handle of opened file */ + DWORD fileaccess; /* OS file access (requested) */ + DWORD fileshare; /* OS file sharing mode */ + DWORD filecreate; /* OS method of opening/creating */ + DWORD fileattrib; /* OS file attribute flags */ + SECURITY_ATTRIBUTES SecurityAttributes; + + DBUG_ENTER("my_win_sopen"); + + if (check_if_legal_filename(path)) + { + errno= EACCES; + DBUG_RETURN(-1); + } + SecurityAttributes.nLength= sizeof(SecurityAttributes); + SecurityAttributes.lpSecurityDescriptor= NULL; + SecurityAttributes.bInheritHandle= !(oflag & _O_NOINHERIT); + + /* decode the access flags */ + switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { + case _O_RDONLY: /* read access */ + fileaccess= GENERIC_READ; + break; + case _O_WRONLY: /* write access */ + fileaccess= GENERIC_WRITE; + break; + case _O_RDWR: /* read and write access */ + fileaccess= GENERIC_READ | GENERIC_WRITE; + break; + default: /* error, bad oflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode sharing flags */ + switch (shflag) { + case _SH_DENYRW: /* exclusive access except delete */ + fileshare= FILE_SHARE_DELETE; + break; + case _SH_DENYWR: /* share read and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_DELETE; + break; + case _SH_DENYRD: /* share write and delete access */ + fileshare= FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYNO: /* share read, write and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYRWD: /* exclusive access */ + fileshare= 0L; + break; + case _SH_DENYWRD: /* share read access */ + fileshare= FILE_SHARE_READ; + break; + case _SH_DENYRDD: /* share write access */ + fileshare= FILE_SHARE_WRITE; + break; + case _SH_DENYDEL: /* share read and write access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE; + break; + default: /* error, bad shflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode open/create method flags */ + switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { + case 0: + case _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= OPEN_EXISTING; + break; + + case _O_CREAT: + filecreate= OPEN_ALWAYS; + break; + + case _O_CREAT | _O_EXCL: + case _O_CREAT | _O_TRUNC | _O_EXCL: + filecreate= CREATE_NEW; + break; + + case _O_TRUNC: + case _O_TRUNC | _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= TRUNCATE_EXISTING; + break; + + case _O_CREAT | _O_TRUNC: + filecreate= CREATE_ALWAYS; + break; + + default: + /* this can't happen ... all cases are covered */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode file attribute flags if _O_CREAT was specified */ + fileattrib= FILE_ATTRIBUTE_NORMAL; /* default */ + if (oflag & _O_CREAT) + { + _umask((mask= _umask(0))); + + if (!((pmode & ~mask) & _S_IWRITE)) + fileattrib= FILE_ATTRIBUTE_READONLY; + } + + /* Set temporary file (delete-on-close) attribute if requested. */ + if (oflag & _O_TEMPORARY) + { + fileattrib|= FILE_FLAG_DELETE_ON_CLOSE; + fileaccess|= DELETE; + } + + /* Set temporary file (delay-flush-to-disk) attribute if requested.*/ + if (oflag & _O_SHORT_LIVED) + fileattrib|= FILE_ATTRIBUTE_TEMPORARY; + + /* Set sequential or random access attribute if requested. */ + if (oflag & _O_SEQUENTIAL) + fileattrib|= FILE_FLAG_SEQUENTIAL_SCAN; + else if (oflag & _O_RANDOM) + fileattrib|= FILE_FLAG_RANDOM_ACCESS; + + /* try to open/create the file */ + if ((osfh= CreateFile(path, fileaccess, fileshare, &SecurityAttributes, + filecreate, fileattrib, NULL)) == INVALID_HANDLE_VALUE) + { + /* + OS call to open/create file failed! map the error, release + the lock, and return -1. note that it's not necessary to + call _free_osfhnd (it hasn't been used yet). + */ + my_osmaperr(GetLastError()); /* map error */ + DBUG_RETURN(-1); /* return error to caller */ + } + + if ((fh= my_open_osfhandle(osfh, + oflag & (_O_APPEND | _O_RDONLY | _O_TEXT))) == -1) + { + CloseHandle(osfh); + } + + DBUG_RETURN(fh); /* return handle */ +} + + +File my_win_open(const char *path, int flags) +{ + DBUG_ENTER("my_win_open"); + DBUG_RETURN(my_win_sopen((char *) path, flags | _O_BINARY, _SH_DENYNO, + _S_IREAD | S_IWRITE)); +} + + +int my_win_close(File fd) +{ + DBUG_ENTER("my_win_close"); + if(CloseHandle(my_get_osfhandle(fd))) + { + invalidate_fd(fd); + DBUG_RETURN(0); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + +size_t my_win_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset) +{ + DWORD nBytesRead; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pread"); + + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, &ov)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_read(File Filedes, uchar *Buffer, size_t Count) +{ + DWORD nBytesRead; + HANDLE hFile; + + DBUG_ENTER("my_win_read"); + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, NULL)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_pwrite(File Filedes, const uchar *Buffer, size_t Count, + my_off_t offset) +{ + DWORD nBytesWritten; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pwrite"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count: %llu, offset: %llu", + Filedes, Buffer, (ulonglong)Count, (ulonglong)offset)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nBytesWritten, &ov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + else + DBUG_RETURN(nBytesWritten); +} + + +my_off_t my_win_lseek(File fd, my_off_t pos, int whence) +{ + LARGE_INTEGER offset; + LARGE_INTEGER newpos; + + DBUG_ENTER("my_win_lseek"); + + /* Check compatibility of Windows and Posix seek constants */ + compile_time_assert(FILE_BEGIN == SEEK_SET && FILE_CURRENT == SEEK_CUR + && FILE_END == SEEK_END); + + offset.QuadPart= pos; + if(!SetFilePointerEx(my_get_osfhandle(fd), offset, &newpos, whence)) + { + my_osmaperr(GetLastError()); + newpos.QuadPart= -1; + } + DBUG_RETURN(newpos.QuadPart); +} + + +#ifndef FILE_WRITE_TO_END_OF_FILE +#define FILE_WRITE_TO_END_OF_FILE 0xffffffff +#endif +size_t my_win_write(File fd, const uchar *Buffer, size_t Count) +{ + DWORD nWritten; + OVERLAPPED ov; + OVERLAPPED *pov= NULL; + HANDLE hFile; + + DBUG_ENTER("my_win_write"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count %llu", fd, Buffer, + (ulonglong)Count)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + if(my_get_open_flags(fd) & _O_APPEND) + { + /* + Atomic append to the end of file is is done by special initialization of + the OVERLAPPED structure. See MSDN WriteFile documentation for more info. + */ + memset(&ov, 0, sizeof(ov)); + ov.Offset= FILE_WRITE_TO_END_OF_FILE; + ov.OffsetHigh= -1; + pov= &ov; + } + + hFile= my_get_osfhandle(fd); + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nWritten, pov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nWritten); +} + + +int my_win_chsize(File fd, my_off_t newlength) +{ + HANDLE hFile; + LARGE_INTEGER length; + DBUG_ENTER("my_win_chsize"); + + hFile= (HANDLE) my_get_osfhandle(fd); + length.QuadPart= newlength; + if (!SetFilePointerEx(hFile, length , NULL , FILE_BEGIN)) + goto err; + if (!SetEndOfFile(hFile)) + goto err; + DBUG_RETURN(0); +err: + my_osmaperr(GetLastError()); + my_errno= errno; + DBUG_RETURN(-1); +} + + +/* Get the file descriptor for stdin,stdout or stderr */ +static File my_get_stdfile_descriptor(FILE *stream) +{ + HANDLE hFile; + DWORD nStdHandle; + DBUG_ENTER("my_get_stdfile_descriptor"); + + if(stream == stdin) + nStdHandle= STD_INPUT_HANDLE; + else if(stream == stdout) + nStdHandle= STD_OUTPUT_HANDLE; + else if(stream == stderr) + nStdHandle= STD_ERROR_HANDLE; + else + DBUG_RETURN(-1); + + hFile= GetStdHandle(nStdHandle); + if(hFile != INVALID_HANDLE_VALUE) + DBUG_RETURN(my_open_osfhandle(hFile, 0)); + DBUG_RETURN(-1); +} + + +File my_win_fileno(FILE *file) +{ + HANDLE hFile= (HANDLE)_get_osfhandle(fileno(file)); + int retval= -1; + uint i; + + DBUG_ENTER("my_win_fileno"); + + for(i= MY_FILE_MIN; i < my_file_limit; i++) + { + if(my_file_info[i].fhandle == hFile) + { + retval= i; + break; + } + } + if(retval == -1) + /* try std stream */ + DBUG_RETURN(my_get_stdfile_descriptor(file)); + DBUG_RETURN(retval); +} + + +FILE *my_win_fopen(const char *filename, const char *type) +{ + FILE *file; + int flags= 0; + DBUG_ENTER("my_win_open"); + + /* + If we are not creating, then we need to use my_access to make sure + the file exists since Windows doesn't handle files like "com1.sym" + very well + */ + if (check_if_legal_filename(filename)) + { + errno= EACCES; + DBUG_RETURN(NULL); + } + + file= fopen(filename, type); + if(!file) + DBUG_RETURN(NULL); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + + /* + Register file handle in my_table_info. + Necessary for my_fileno() + */ + if(my_open_osfhandle((HANDLE)_get_osfhandle(fileno(file)), flags) < 0) + { + fclose(file); + DBUG_RETURN(NULL); + } + DBUG_RETURN(file); +} + + +FILE * my_win_fdopen(File fd, const char *type) +{ + FILE *file; + int crt_fd; + int flags= 0; + + DBUG_ENTER("my_win_fdopen"); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + /* Convert OS file handle to CRT file descriptor and then call fdopen*/ + crt_fd= _open_osfhandle((intptr_t)my_get_osfhandle(fd), flags); + if(crt_fd < 0) + file= NULL; + else + file= fdopen(crt_fd, type); + DBUG_RETURN(file); +} + + +int my_win_fclose(FILE *file) +{ + File fd; + + DBUG_ENTER("my_win_close"); + fd= my_fileno(file); + if(fd < 0) + DBUG_RETURN(-1); + if(fclose(file) < 0) + DBUG_RETURN(-1); + invalidate_fd(fd); + DBUG_RETURN(0); +} + + + +/* + Quick and dirty my_fstat() implementation for Windows. + Use CRT fstat on temporarily allocated file descriptor. + Patch file size, because size that fstat returns is not + reliable (may be outdated) +*/ +int my_win_fstat(File fd, struct _stati64 *buf) +{ + int crt_fd; + int retval; + HANDLE hFile, hDup; + + DBUG_ENTER("my_win_fstat"); + + hFile= my_get_osfhandle(fd); + if(!DuplicateHandle( GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup ,0,FALSE,DUPLICATE_SAME_ACCESS)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); + } + if ((crt_fd= _open_osfhandle((intptr_t)hDup,0)) < 0) + DBUG_RETURN(-1); + + retval= _fstati64(crt_fd, buf); + if(retval == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + GetFileSizeEx(hDup, (PLARGE_INTEGER) (&(buf->st_size))); + } + _close(crt_fd); + DBUG_RETURN(retval); +} + + + +int my_win_stat( const char *path, struct _stati64 *buf) +{ + DBUG_ENTER("my_win_stat"); + if(_stati64( path, buf) == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + WIN32_FILE_ATTRIBUTE_DATA data; + if (GetFileAttributesEx(path, GetFileExInfoStandard, &data)) + { + LARGE_INTEGER li; + li.LowPart= data.nFileSizeLow; + li.HighPart= data.nFileSizeHigh; + buf->st_size= li.QuadPart; + } + DBUG_RETURN(0); + } + DBUG_RETURN(-1); +} + + + +int my_win_fsync(File fd) +{ + DBUG_ENTER("my_win_fsync"); + if(FlushFileBuffers(my_get_osfhandle(fd))) + DBUG_RETURN(0); + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + + +int my_win_dup(File fd) +{ + HANDLE hDup; + DBUG_ENTER("my_win_dup"); + if (DuplicateHandle(GetCurrentProcess(), my_get_osfhandle(fd), + GetCurrentProcess(), &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + DBUG_RETURN(my_open_osfhandle(hDup, my_get_open_flags(fd))); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + +#endif /*_WIN32*/ diff --git a/mysys/my_write.c b/mysys/my_write.c index 52127545888..5b916d6cb65 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -22,7 +22,7 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) { - size_t writenbytes, written; + size_t writtenbytes, written; uint errors; DBUG_ENTER("my_write"); DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", @@ -35,17 +35,27 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) for (;;) { - if ((writenbytes= write(Filedes, Buffer, Count)) == Count) - break; - if (writenbytes != (size_t) -1) - { /* Safeguard */ - written+=writenbytes; - Buffer+=writenbytes; - Count-=writenbytes; +#ifdef _WIN32 + if(Filedes < 0) + { + errno = EINVAL; + return -1; } - my_errno=errno; + writtenbytes= my_win_write(Filedes, Buffer, Count); +#else + writtenbytes= write(Filedes, Buffer, Count); +#endif + if (writtenbytes == Count) + break; + if (writtenbytes != (size_t) -1) + { /* Safeguard */ + written+= writtenbytes; + Buffer+= writtenbytes; + Count-= writtenbytes; + } + my_errno= errno; DBUG_PRINT("error",("Write only %ld bytes, error: %d", - (long) writenbytes, my_errno)); + (long) writtenbytes, my_errno)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) @@ -59,19 +69,19 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) continue; } - if ((writenbytes == 0 || writenbytes == (size_t) -1)) + if ((writtenbytes == 0 || writtenbytes == (size_t) -1)) { if (my_errno == EINTR) { DBUG_PRINT("debug", ("my_write() was interrupted and returned %ld", - (long) writenbytes)); + (long) writtenbytes)); continue; /* Interrupted */ } - if (!writenbytes && !errors++) /* Retry once */ + if (!writtenbytes && !errors++) /* Retry once */ { /* We may come here if the file quota is exeeded */ - errno=EFBIG; /* Assume this is the error */ + errno= EFBIG; /* Assume this is the error */ continue; } } @@ -92,5 +102,5 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) } if (MyFlags & (MY_NABP | MY_FNABP)) DBUG_RETURN(0); /* Want only errors */ - DBUG_RETURN(writenbytes+written); + DBUG_RETURN(writtenbytes+written); } /* my_write */ diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h index 113b64005f2..6d39999aa86 100644 --- a/mysys/mysys_priv.h +++ b/mysys/mysys_priv.h @@ -45,3 +45,27 @@ extern pthread_mutex_t THR_LOCK_charset, THR_LOCK_time; void my_error_unregister_all(void); void my_thread_destroy_mutex(void); my_bool my_wait_for_other_threads_to_die(uint number_of_threads); + +#ifdef _WIN32 +/* my_winfile.c exports, should not be used outside mysys */ +extern File my_win_open(const char *path, int oflag); +extern int my_win_close(File fd); +extern size_t my_win_read(File fd, uchar *buffer, size_t count); +extern size_t my_win_write(File fd, const uchar *buffer, size_t count); +extern size_t my_win_pread(File fd, uchar *buffer, size_t count, + my_off_t offset); +extern size_t my_win_pwrite(File fd, const uchar *buffer, size_t count, + my_off_t offset); +extern my_off_t my_win_lseek(File fd, my_off_t pos, int whence); +extern int my_win_chsize(File fd, my_off_t newlength); +extern FILE* my_win_fopen(const char *filename, const char *type); +extern File my_win_fclose(FILE *file); +extern File my_win_fileno(FILE *file); +extern FILE* my_win_fdopen(File Filedes, const char *type); +extern int my_win_stat(const char *path, struct _stat64 *buf); +extern int my_win_fstat(File fd, struct _stat64 *buf); +extern int my_win_fsync(File fd); +extern File my_win_dup(File fd); +extern File my_win_sopen(const char *path, int oflag, int shflag, int perm); +extern File my_open_osfhandle(HANDLE handle, int oflag); +#endif diff --git a/sql/discover.cc b/sql/discover.cc index 56dc00cc5c4..92af5d56016 100644 --- a/sql/discover.cc +++ b/sql/discover.cc @@ -67,7 +67,7 @@ int readfrm(const char *name, uchar **frmdata, size_t *len) error= 2; if (my_fstat(file, &state, MYF(0))) goto err; - read_len= state.st_size; + read_len= (size_t)state.st_size; // Read whole frm file error= 3; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index d6ac3a341a4..46182a96020 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -3025,16 +3025,16 @@ String *Item_load_file::val_str(String *str) func_name(), current_thd->variables.max_allowed_packet); goto err; } - if (tmp_value.alloc(stat_info.st_size)) + if (tmp_value.alloc((size_t)stat_info.st_size)) goto err; if ((file = my_open(file_name->ptr(), O_RDONLY, MYF(0))) < 0) goto err; - if (my_read(file, (uchar*) tmp_value.ptr(), stat_info.st_size, MYF(MY_NABP))) + if (my_read(file, (uchar*) tmp_value.ptr(), (size_t)stat_info.st_size, MYF(MY_NABP))) { my_close(file, MYF(0)); goto err; } - tmp_value.length(stat_info.st_size); + tmp_value.length((uint32)stat_info.st_size); my_close(file, MYF(0)); null_value = 0; DBUG_RETURN(&tmp_value); diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index d70de0dd13c..7ee411f94dc 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -684,11 +684,11 @@ int ha_archive::create(const char *name, TABLE *table_arg, { if (!my_fstat(frm_file, &file_stat, MYF(MY_WME))) { - frm_ptr= (uchar *)my_malloc(sizeof(uchar) * file_stat.st_size, MYF(0)); + frm_ptr= (uchar *)my_malloc(sizeof(uchar) * (size_t)file_stat.st_size, MYF(0)); if (frm_ptr) { - my_read(frm_file, frm_ptr, file_stat.st_size, MYF(0)); - azwrite_frm(&create_stream, (char *)frm_ptr, file_stat.st_size); + my_read(frm_file, frm_ptr, (size_t)file_stat.st_size, MYF(0)); + azwrite_frm(&create_stream, (char *)frm_ptr, (size_t)file_stat.st_size); my_free((uchar*)frm_ptr, MYF(0)); } } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6f58fd70fbd..d5c92ff7c22 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1061,7 +1061,29 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ + +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..6ccdd9e3574 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -48,7 +48,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include - +#ifdef _WIN32 +#include +#endif /** @file ha_innodb.cc */ /* Include necessary InnoDB headers */ @@ -1172,7 +1174,28 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index d986525814e..28b7a93164d 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -415,9 +415,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (!share->temporary) error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_KEEP, FLUSH_KEEP); -#ifdef HAVE_PREAD + _ma_decrement_open_count(info, 1); -#endif if (share->not_flushed) { share->not_flushed= 0; diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c index 97011831af8..17c1fc83f13 100644 --- a/storage/myisam/mi_locking.c +++ b/storage/myisam/mi_locking.c @@ -239,7 +239,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) break; /* Impossible */ } } -#ifdef __WIN__ +#ifdef _WIN32 else { /* @@ -521,11 +521,11 @@ int _mi_writeinfo(register MI_INFO *info, uint operation) share->state.update_count= info->last_loop= ++info->this_loop; if ((error=mi_state_info_write(share->kfile, &share->state, 1))) olderror=my_errno; -#ifdef __WIN__ +#ifdef _WIN32 if (myisam_flush) { - _commit(share->kfile); - _commit(info->dfile); + my_sync(share->kfile,0); + my_sync(info->dfile,0); } #endif } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index dfe98f09e4f..6d233a4219d 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -55,7 +55,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include #include - +#ifdef _WIN32 +#include +#endif /** @file ha_innodb.cc */ /* Include necessary InnoDB headers */ @@ -1219,7 +1221,28 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; From 4bca1a786f9986cec7d0487059451e51e2b9479b Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:07:18 +0200 Subject: [PATCH 014/359] Fix XtraDB LPBug #714143 : Windows native async io is disabled. The patch uses completion ports for asynchronous IO notification , instead of formerly used notification via event . This also removes the limit of 64 async IOs per background IO thread (this limit was forced by using WaitForMultipleObjects in previous AIO implementation) --- CMakeLists.txt | 2 +- storage/innodb_plugin/handler/ha_innodb.cc | 4 +- storage/xtradb/include/os0file.h | 4 +- storage/xtradb/os/os0file.c | 451 ++++++++------------- storage/xtradb/srv/srv0start.c | 13 +- 5 files changed, 181 insertions(+), 293 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6a65505fe..9d4df51bcb1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ IF (MSVC_VERSION GREATER 1400) ENDIF() -SET(CMAKE_INSTALL_PREFIX "C:/MariaDB${MYSQL_BASE_VERSION}") +SET(CMAKE_INSTALL_PREFIX "C:/MariaDB${MYSQL_BASE_VERSION}" CACHE PATH "Default installation directory") SET(INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") # Set standard options ADD_DEFINITIONS(-DHAVE_YASSL) diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..6ea751da332 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -10916,12 +10916,12 @@ static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads, static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of background read I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); + NULL, NULL, IF_WIN(1,4), 1, 64, 0); static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of background write I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); + NULL, NULL, IF_WIN(1,4), 1, 64, 0); static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 732e930517b..4a99eb6b0b5 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -152,8 +152,8 @@ log. */ #define OS_FILE_LOG 256 /* This can be ORed to type */ /* @} */ -#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more - than 64 */ +#define OS_AIO_N_PENDING_IOS_PER_THREAD 256 /*!< Windows might be able to handle +more */ /** Modes for aio operations @{ */ #define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 5b8e656d8b2..9817edd7f0d 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -121,6 +121,12 @@ typedef struct os_aio_slot_struct os_aio_slot_t; /** The asynchronous i/o array slot structure */ struct os_aio_slot_struct{ +#ifdef WIN_ASYNC_IO + OVERLAPPED control; /*!< Windows control block for the + aio request, MUST be first element in the structure*/ + void *arr; /*!< Array this slot belongs to*/ +#endif + ibool is_read; /*!< TRUE if a read operation */ ulint pos; /*!< index of the slot in the aio array */ @@ -148,12 +154,6 @@ struct os_aio_slot_struct{ and which can be used to identify which pending aio operation was completed */ -#ifdef WIN_ASYNC_IO - os_event_t event; /*!< event object we need in the - OVERLAPPED struct */ - OVERLAPPED control; /*!< Windows control block for the - aio request */ -#endif }; /** The asynchronous i/o array structure */ @@ -182,15 +182,6 @@ struct os_aio_array_struct{ /*!< Number of reserved slots in the aio array outside the ibuf segment */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ -#ifdef __WIN__ - os_native_event_t* native_events; - /*!< Pointer to an array of OS native - event handles where we copied the - handles from slots, in the same - order. This can be used in - WaitForMultipleObjects; used only in - Windows */ -#endif }; /** Array of events used in simulated aio */ @@ -250,6 +241,14 @@ UNIV_INTERN ulint os_n_pending_writes = 0; /** Number of pending read operations */ UNIV_INTERN ulint os_n_pending_reads = 0; + +#ifdef _WIN32 +/** IO completion port used by background io threads */ +static HANDLE completion_port; +/** Thread local storage index for the per-thread event used for synchronous IO */ +static DWORD tls_sync_io = TLS_OUT_OF_INDEXES; +#endif + /***********************************************************************//** Gets the operating system version. Currently works only on Windows. @return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */ @@ -286,6 +285,86 @@ os_get_os_version(void) #endif } + +#ifdef _WIN32 +/* +Windows : Handling synchronous IO on files opened asynchronously. + +If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to +a completion port, then every IO on this file would normally be enqueued to the +completion port. Sometimes however we would like to do a synchronous IO. This is +possible if we initialitze have overlapped.hEvent with a valid event and set its +lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info) + +We'll create this special event once for each thread and store in thread local +storage. +*/ + + +/***********************************************************************//** +Initialize tls index.for event handle used for synchronized IO on files that +might be opened with FILE_FLAG_OVERLAPPED. +*/ +static void win_init_syncio_event() +{ + tls_sync_io = TlsAlloc(); + ut_a(tls_sync_io != TLS_OUT_OF_INDEXES); +} + +/***********************************************************************//** +Retrieve per-thread event for doing synchronous io on asyncronously opened files +*/ +static HANDLE win_get_syncio_event() +{ + HANDLE h; + if(tls_sync_io == TLS_OUT_OF_INDEXES){ + win_init_syncio_event(); + } + + h = (HANDLE)TlsGetValue(tls_sync_io); + if (h) + return h; + h = CreateEventA(NULL, FALSE, FALSE, NULL); + ut_a(h); + h = (HANDLE)((uintptr_t)h | 1); + TlsSetValue(tls_sync_io, h); + return h; +} + +/* + TLS destructor, inspired by Chromium code + http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc +*/ + +static void win_free_syncio_event() +{ + HANDLE h = win_get_syncio_event(); + if (h) { + CloseHandle(h); + } +} + +static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) { + if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason) + win_free_syncio_event(); +} + +#ifdef _WIN64 +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_base") +#pragma const_seg(".CRT$XLB") +extern const PIMAGE_TLS_CALLBACK p_thread_callback_base; +const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; +#pragma data_seg() +#else +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_p_thread_callback_base") +#pragma data_seg(".CRT$XLB") +PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; +#pragma data_seg() +#endif +#endif /*_WIN32 */ + /***********************************************************************//** Retrieves the last error number if an error occurs in a file io function. The number should be retrieved before any other OS calls (because they may @@ -611,6 +690,9 @@ os_io_init_simple(void) for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { os_file_seek_mutexes[i] = os_mutex_create(NULL); } +#ifdef _WIN32 + win_init_syncio_event(); +#endif } /***********************************************************************//** @@ -1325,6 +1407,8 @@ try_again: #endif #ifdef UNIV_NON_BUFFERED_IO # ifndef UNIV_HOTBACKUP + if (type == OS_LOG_FILE) + attributes = attributes | FILE_FLAG_SEQUENTIAL_SCAN; if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { /* Do not use unbuffered i/o to log files because value 2 denotes that we do not flush the log at every @@ -1402,6 +1486,9 @@ try_again: } } else { *success = TRUE; + if (os_aio_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) { + ut_a(CreateIoCompletionPort(file, completion_port, 0, 0)); + } } return(file); @@ -2350,13 +2437,10 @@ _os_file_read( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2378,33 +2462,18 @@ try_again: os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; + ret = ReadFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2433,9 +2502,6 @@ try_again: (ulong)n, (ulong)offset_high, (ulong)offset, (long)ret); #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error(NULL, "read"); if (retry) { @@ -2477,13 +2543,10 @@ os_file_read_no_error_handling( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ibool retry; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2505,33 +2568,19 @@ try_again: os_n_pending_reads++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; + ret = ReadFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; @@ -2554,9 +2603,6 @@ try_again: return(TRUE); } #endif /* __WIN__ */ -#ifdef __WIN__ -error_handling: -#endif retry = os_file_handle_error_no_exit(NULL, "read"); if (retry) { @@ -2609,14 +2655,11 @@ os_file_write( #ifdef __WIN__ BOOL ret; DWORD len; - DWORD ret2; DWORD low; DWORD high; ulint n_retries = 0; ulint err; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ + OVERLAPPED overlapped; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2636,56 +2679,18 @@ retry: os_n_pending_writes++; os_mutex_exit(os_file_count_mutex); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + memset (&overlapped, 0, sizeof (overlapped)); + overlapped.Offset = low; + overlapped.OffsetHigh = high; + overlapped.hEvent = win_get_syncio_event(); - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); + ret = WriteFile(file, buf, n, NULL, &overlapped); + if (ret) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); - - /* Always do fsync to reduce the probability that when the OS crashes, - a database page is only partially physically written to disk. */ - -# ifdef UNIV_DO_FLUSH - if (!os_do_not_call_flush_at_each_write) { - ut_a(TRUE == os_file_flush(file)); + else if(GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } -# endif /* UNIV_DO_FLUSH */ - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ os_mutex_enter(os_file_count_mutex); os_n_pending_writes--; @@ -3071,9 +3076,6 @@ os_aio_array_create( os_aio_array_t* array; ulint i; os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - OVERLAPPED* over; -#endif ut_a(n > 0); ut_a(n_segments > 0); @@ -3089,23 +3091,11 @@ os_aio_array_create( array->n_segments = n_segments; array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); -#ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); -#endif + for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); - slot->pos = i; slot->reserved = FALSE; -#ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); - - over = &(slot->control); - - over->hEvent = slot->event->handle; - - *((array->native_events) + i) = over->hEvent; -#endif } return(array); @@ -3119,18 +3109,7 @@ os_aio_array_free( /*==============*/ os_aio_array_t* array) /*!< in, own: array to free */ { -#ifdef WIN_ASYNC_IO - ulint i; - for (i = 0; i < array->n_slots; i++) { - os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); - os_event_free(slot->event); - } -#endif /* WIN_ASYNC_IO */ - -#ifdef __WIN__ - ut_free(array->native_events); -#endif /* __WIN__ */ os_mutex_free(array->mutex); os_event_free(array->not_full); os_event_free(array->is_empty); @@ -3209,7 +3188,11 @@ os_aio_init( } os_last_printout = time(NULL); - +#ifdef _WIN32 + ut_a(completion_port == 0); + completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + ut_a(completion_port); +#endif } /*********************************************************************** @@ -3251,11 +3234,10 @@ os_aio_array_wake_win_aio_at_shutdown( /*==================================*/ os_aio_array_t* array) /*!< in: aio array */ { - ulint i; - - for (i = 0; i < array->n_slots; i++) { - - os_event_set((array->slots + i)->event); + if(completion_port) + { + ut_a(CloseHandle(completion_port)); + completion_port = 0; } } #endif @@ -3480,7 +3462,8 @@ found: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); + control->hEvent = 0; + slot->arr = array; #endif os_mutex_exit(array->mutex); @@ -3517,9 +3500,6 @@ os_aio_array_free_slot( os_event_set(array->is_empty); } -#ifdef WIN_ASYNC_IO - os_event_reset(slot->event); -#endif os_mutex_exit(array->mutex); } @@ -3689,12 +3669,8 @@ os_aio( os_aio_array_t* array; os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO - ibool retval; - BOOL ret = TRUE; DWORD len = (DWORD) n; - struct fil_node_struct * dummy_mess1; - void* dummy_mess2; - ulint dummy_type; + BOOL ret; #endif ulint err = 0; ibool retry; @@ -3713,26 +3689,23 @@ os_aio( wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); - if (mode == OS_AIO_SYNC -#ifdef WIN_ASYNC_IO - && !os_aio_use_native_aio -#endif - ) { + if (mode == OS_AIO_SYNC) + { + ibool ret; /* This is actually an ordinary synchronous read or write: - no need to use an i/o-handler thread. NOTE that if we use - Windows async i/o, Windows does not allow us to use - ordinary synchronous os_file_read etc. on the same file, - therefore we have built a special mechanism for synchronous - wait in the Windows case. */ + no need to use an i/o-handler thread */ if (type == OS_FILE_READ) { - return(_os_file_read(file, buf, offset, - offset_high, n, trx)); + ret = _os_file_read(file, buf, offset, + offset_high, n, trx); } + else { + ut_a(type == OS_FILE_WRITE); - ut_a(type == OS_FILE_WRITE); - - return(os_file_write(name, file, buf, offset, offset_high, n)); + ret = os_file_write(name, file, buf, offset, offset_high, n); + } + ut_a(ret); + return ret; } try_again: @@ -3775,6 +3748,8 @@ try_again: ret = ReadFile(file, buf, (DWORD)n, &len, &(slot->control)); + if(!ret && GetLastError() != ERROR_IO_PENDING) + err = 1; #endif } else { if (!wake_later) { @@ -3789,6 +3764,8 @@ try_again: os_n_file_writes++; ret = WriteFile(file, buf, (DWORD)n, &len, &(slot->control)); + if(!ret && GetLastError() != ERROR_IO_PENDING) + err = 1; #endif } else { if (!wake_later) { @@ -3801,34 +3778,7 @@ try_again: ut_error; } -#ifdef WIN_ASYNC_IO - if (os_aio_use_native_aio) { - if ((ret && len == n) - || (!ret && GetLastError() == ERROR_IO_PENDING)) { - /* aio was queued successfully! */ - if (mode == OS_AIO_SYNC) { - /* We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - retval = os_aio_windows_handle(ULINT_UNDEFINED, - slot->pos, - &dummy_mess1, - &dummy_mess2, - &dummy_type, - &space_id); - - return(retval); - } - - return(TRUE); - } - - err = 1; /* Fall through the next if */ - } -#endif if (err == 0) { /* aio was queued successfully! */ @@ -3881,52 +3831,21 @@ os_aio_windows_handle( ulint* space_id) { ulint orig_seg = segment; - os_aio_array_t* array; os_aio_slot_t* slot; - ulint n; - ulint i; ibool ret_val; BOOL ret; DWORD len; BOOL retry = FALSE; + ULONG_PTR dummy_key; - if (segment == ULINT_UNDEFINED) { - array = os_aio_sync_array; - segment = 0; - } else { - segment = os_aio_get_array_and_local_segment(&array, segment); + + ret = GetQueuedCompletionStatus(completion_port, &len, &dummy_key, + (OVERLAPPED **)&slot, INFINITE); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); } - /* NOTE! We only access constant fields in os_aio_array. Therefore - we do not have to acquire the protecting mutex yet */ - - ut_ad(os_aio_validate()); - ut_ad(segment < array->n_segments); - - n = array->n_slots; - - if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); - i = pos; - } else { - srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - ); - } - - os_mutex_enter(array->mutex); - - slot = os_aio_array_get_nth_slot(array, i); - - ut_a(slot->reserved); - - if (orig_seg != ULINT_UNDEFINED) { - srv_set_io_thread_op_info(orig_seg, - "get windows aio return value"); - } - - ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); *message1 = slot->message1; *message2 = slot->message2; @@ -3951,8 +3870,6 @@ os_aio_windows_handle( ret_val = FALSE; } - os_mutex_exit(array->mutex); - if (retry) { /* retry failed read/write operation synchronously. No need to hold array->mutex. */ @@ -3961,37 +3878,19 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret = WriteFile(slot->file, slot->buf, - (DWORD) slot->len, &len, - &(slot->control)); - + ret_val = os_file_write(slot->name, slot->file, slot->buf, + slot->control.Offset, slot->control.OffsetHigh, slot->len); break; case OS_FILE_READ: - ret = ReadFile(slot->file, slot->buf, - (DWORD) slot->len, &len, - &(slot->control)); - + ret_val = os_file_read(slot->file, slot->buf, + slot->control.Offset, slot->control.OffsetHigh, slot->len); break; default: ut_error; } - - if (!ret && GetLastError() == ERROR_IO_PENDING) { - /* aio was queued successfully! - We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - ret = GetOverlappedResult(slot->file, - &(slot->control), - &len, TRUE); - } - - ret_val = ret && len == slot->len; } - os_aio_array_free_slot(array, slot); + os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot); return(ret_val); } diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index cef045d72e1..60ad0904633 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1274,13 +1274,7 @@ innobase_start_or_create_for_mysql(void) break; default: /* On Win 2000 and XP use async i/o */ - //os_aio_use_native_aio = TRUE; - os_aio_use_native_aio = FALSE; - fprintf(stderr, - "InnoDB: Windows native async i/o is disabled as default.\n" - "InnoDB: It is not applicable for the current" - " multi io threads implementation.\n"); - break; + os_aio_use_native_aio = TRUE; } #endif if (srv_file_flush_method_str == NULL) { @@ -1320,11 +1314,6 @@ innobase_start_or_create_for_mysql(void) "async_unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = TRUE; - srv_n_read_io_threads = srv_n_write_io_threads = 1; - fprintf(stderr, - "InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n" - "InnoDB: Windows native async i/o is enabled.\n" - "InnoDB: And io threads are restricted.\n"); #endif } else { fprintf(stderr, From fe054adfcaecbd891da802517826a951bb4ca377 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:09:28 +0200 Subject: [PATCH 015/359] Backport fix for MySQL bug #56405 : use native windows condition variables and rwlocks in mysys, if Windows supports it. --- include/my_pthread.h | 76 +++++-- mysys/my_wincond.c | 232 +++++++++++++++++---- mysys/my_winthread.c | 15 +- mysys/thr_rwlock.c | 160 ++++++++++++++ storage/innodb_plugin/handler/ha_innodb.cc | 22 ++ storage/pbxt/src/pthread_xt.cc | 43 +--- 6 files changed, 450 insertions(+), 98 deletions(-) diff --git a/include/my_pthread.h b/include/my_pthread.h index fffb883912a..aa9eb8fc807 100644 --- a/include/my_pthread.h +++ b/include/my_pthread.h @@ -48,19 +48,30 @@ typedef struct st_pthread_link { struct st_pthread_link *next; } pthread_link; -typedef struct { - uint32 waiting; - CRITICAL_SECTION lock_waiting; - - enum { - SIGNAL= 0, - BROADCAST= 1, - MAX_EVENTS= 2 - } EVENTS; - - HANDLE events[MAX_EVENTS]; - HANDLE broadcast_block_event; +/** + Implementation of Windows condition variables. + We use native conditions on Vista and later, and fallback to own + implementation on earlier OS version. +*/ +typedef union +{ + /* Native condition (used on Vista and later) */ + CONDITION_VARIABLE native_cond; + /* Own implementation (used on XP) */ + struct + { + uint32 waiting; + CRITICAL_SECTION lock_waiting; + enum + { + SIGNAL= 0, + BROADCAST= 1, + MAX_EVENTS= 2 + } EVENTS; + HANDLE events[MAX_EVENTS]; + HANDLE broadcast_block_event; + }; } pthread_cond_t; @@ -632,6 +643,45 @@ int my_pthread_fastmutex_lock(my_pthread_fastmutex_t *mp); #endif #define my_rwlock_init(A,B) rwlock_init((A),USYNC_THREAD,0) #else +#ifdef _WIN32 +/** + Implementation of Windows rwlock. + + We use native (slim) rwlocks on Win7 and later, and fallback to portable + implementation on earlier Windows. + + slim rwlock are also available on Vista/WS2008, but we do not use it + ("trylock" APIs are missing on Vista) +*/ +typedef union +{ + /* Native rwlock (is_srwlock == TRUE) */ + struct + { + SRWLOCK srwlock; /* native reader writer lock */ + BOOL have_exclusive_srwlock; /* used for unlock */ + }; + + /* + Portable implementation (is_srwlock == FALSE) + Fields are identical with Unix my_rw_lock_t fields. + */ + struct + { + pthread_mutex_t lock; /* lock for structure */ + pthread_cond_t readers; /* waiting readers */ + pthread_cond_t writers; /* waiting writers */ + int state; /* -1:writer,0:free,>0:readers */ + int waiters; /* number of waiting writers */ +#ifdef SAFE_MUTEX + pthread_t write_thread; +#endif + }; +} my_rw_lock_t; + + +#else /* _WIN32 */ + /* Use our own version of read/write locks */ typedef struct _my_rw_lock_t { pthread_mutex_t lock; /* lock for structure */ @@ -641,6 +691,8 @@ typedef struct _my_rw_lock_t { int waiters; /* number of waiting writers */ } my_rw_lock_t; +#endif /* _WIN32 */ + #define rw_lock_t my_rw_lock_t #define rw_rdlock(A) my_rw_rdlock((A)) #define rw_wrlock(A) my_rw_wrlock((A)) diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c index b869b22bdea..13bf0b78766 100644 --- a/mysys/my_wincond.c +++ b/mysys/my_wincond.c @@ -26,7 +26,108 @@ #include #include -int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) + +/* + Windows native condition variables. We use runtime loading / function + pointers, because they are not available on XP +*/ + +/* Prototypes and function pointers for condition variable functions */ +typedef VOID (WINAPI * InitializeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +typedef BOOL (WINAPI * SleepConditionVariableCSProc) + (PCONDITION_VARIABLE ConditionVariable, + PCRITICAL_SECTION CriticalSection, + DWORD dwMilliseconds); + +typedef VOID (WINAPI * WakeAllConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +typedef VOID (WINAPI * WakeConditionVariableProc) + (PCONDITION_VARIABLE ConditionVariable); + +static InitializeConditionVariableProc my_InitializeConditionVariable; +static SleepConditionVariableCSProc my_SleepConditionVariableCS; +static WakeAllConditionVariableProc my_WakeAllConditionVariable; +static WakeConditionVariableProc my_WakeConditionVariable; + + +/** + Indicates if we have native condition variables, + initialized first time pthread_cond_init is called. +*/ + +static BOOL have_native_conditions= FALSE; + + +/** + Check if native conditions can be used, load function pointers +*/ + +static void check_native_cond_availability(void) +{ + HMODULE module= GetModuleHandle("kernel32"); + + my_InitializeConditionVariable= (InitializeConditionVariableProc) + GetProcAddress(module, "InitializeConditionVariable"); + my_SleepConditionVariableCS= (SleepConditionVariableCSProc) + GetProcAddress(module, "SleepConditionVariableCS"); + my_WakeAllConditionVariable= (WakeAllConditionVariableProc) + GetProcAddress(module, "WakeAllConditionVariable"); + my_WakeConditionVariable= (WakeConditionVariableProc) + GetProcAddress(module, "WakeConditionVariable"); + + if (my_InitializeConditionVariable) + have_native_conditions= TRUE; +} + + + +/** + Convert abstime to milliseconds +*/ + +static DWORD get_milliseconds(const struct timespec *abstime) +{ + long long millis; + union ft64 now; + + if (abstime == NULL) + return INFINITE; + + GetSystemTimeAsFileTime(&now.ft); + + /* + Calculate time left to abstime + - subtract start time from current time(values are in 100ns units) + - convert to millisec by dividing with 10000 + */ + millis= (abstime->tv.i64 - now.i64) / 10000; + + /* Don't allow the timeout to be negative */ + if (millis < 0) + return 0; + + /* + Make sure the calculated timeout does not exceed original timeout + value which could cause "wait for ever" if system time changes + */ + if (millis > abstime->max_timeout_msec) + millis= abstime->max_timeout_msec; + + if (millis > UINT_MAX) + millis= UINT_MAX; + + return (DWORD)millis; +} + + +/* + Old (pre-vista) implementation using events +*/ + +static int legacy_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) { cond->waiting= 0; InitializeCriticalSection(&cond->lock_waiting); @@ -55,7 +156,8 @@ int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) return 0; } -int pthread_cond_destroy(pthread_cond_t *cond) + +static int legacy_cond_destroy(pthread_cond_t *cond) { DeleteCriticalSection(&cond->lock_waiting); @@ -67,48 +169,13 @@ int pthread_cond_destroy(pthread_cond_t *cond) } -int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) -{ - return pthread_cond_timedwait(cond,mutex,NULL); -} - - -int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, +static int legacy_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, struct timespec *abstime) { int result; - long timeout; - union ft64 now; - - if( abstime != NULL ) - { - GetSystemTimeAsFileTime(&now.ft); - - /* - Calculate time left to abstime - - subtract start time from current time(values are in 100ns units) - - convert to millisec by dividing with 10000 - */ - timeout= (long)((abstime->tv.i64 - now.i64) / 10000); - - /* Don't allow the timeout to be negative */ - if (timeout < 0) - timeout= 0L; - - /* - Make sure the calucated timeout does not exceed original timeout - value which could cause "wait for ever" if system time changes - */ - if (timeout > abstime->max_timeout_msec) - timeout= abstime->max_timeout_msec; - - } - else - { - /* No time specified; don't expire */ - timeout= INFINITE; - } + DWORD timeout; + timeout= get_milliseconds(abstime); /* Block access if previous broadcast hasn't finished. This is just for safety and should normally not @@ -144,7 +211,7 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, return result == WAIT_TIMEOUT ? ETIMEDOUT : 0; } -int pthread_cond_signal(pthread_cond_t *cond) +static int legacy_cond_signal(pthread_cond_t *cond) { EnterCriticalSection(&cond->lock_waiting); @@ -157,7 +224,7 @@ int pthread_cond_signal(pthread_cond_t *cond) } -int pthread_cond_broadcast(pthread_cond_t *cond) +static int legacy_cond_broadcast(pthread_cond_t *cond) { EnterCriticalSection(&cond->lock_waiting); /* @@ -179,6 +246,87 @@ int pthread_cond_broadcast(pthread_cond_t *cond) } +/* + Posix API functions. Just choose between native and legacy implementation. +*/ + +int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) +{ + /* + Once initialization is used here rather than in my_init(), to + 1) avoid my_init() pitfalls- undefined order in which initialization should + run + 2) be potentially useful C++ (in static constructors that run before main()) + 3) just to simplify the API. + Also, the overhead of my_pthread_once is very small. + */ + static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT; + my_pthread_once(&once_control, check_native_cond_availability); + + if (have_native_conditions) + { + my_InitializeConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_init(cond, attr); +} + + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + if (have_native_conditions) + return 0; /* no destroy function */ + else + return legacy_cond_destroy(cond); +} + + +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + if (have_native_conditions) + { + my_WakeAllConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_broadcast(cond); +} + + +int pthread_cond_signal(pthread_cond_t *cond) +{ + if (have_native_conditions) + { + my_WakeConditionVariable(&cond->native_cond); + return 0; + } + else + return legacy_cond_signal(cond); +} + + +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + struct timespec *abstime) +{ + if (have_native_conditions) + { + DWORD timeout= get_milliseconds(abstime); + if (!my_SleepConditionVariableCS(&cond->native_cond, mutex, timeout)) + return ETIMEDOUT; + return 0; + } + else + return legacy_cond_timedwait(cond, mutex, abstime); +} + + +int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + return pthread_cond_timedwait(cond, mutex, NULL); +} + + int pthread_attr_init(pthread_attr_t *connect_att) { connect_att->dwStackSize = 0; diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c index 6adf24ef543..5fbad39597c 100644 --- a/mysys/my_winthread.c +++ b/mysys/my_winthread.c @@ -156,8 +156,19 @@ int win_pthread_setspecific(void *a,void *b,uint length) int my_pthread_once(my_pthread_once_t *once_control, void (*init_routine)(void)) { - LONG state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS, - MY_PTHREAD_ONCE_INIT); + LONG state; + + /* + Do "dirty" read to find out if initialization is already done, to + save an interlocked operation in common case. Memory barriers are ensured by + Visual C++ volatile implementation. + */ + if (*once_control == MY_PTHREAD_ONCE_DONE) + return 0; + + state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS, + MY_PTHREAD_ONCE_INIT); + switch(state) { case MY_PTHREAD_ONCE_INIT: diff --git a/mysys/thr_rwlock.c b/mysys/thr_rwlock.c index ea98a854a4d..2978d91090d 100644 --- a/mysys/thr_rwlock.c +++ b/mysys/thr_rwlock.c @@ -19,6 +19,119 @@ #if defined(THREAD) && !defined(HAVE_PTHREAD_RWLOCK_RDLOCK) && !defined(HAVE_RWLOCK_INIT) #include +#ifdef _WIN32 + +static BOOL have_srwlock= FALSE; +/* Prototypes and function pointers for windows functions */ +typedef VOID (WINAPI* srw_func) (PSRWLOCK SRWLock); +typedef BOOL (WINAPI* srw_bool_func) (PSRWLOCK SRWLock); + +static srw_func my_InitializeSRWLock; +static srw_func my_AcquireSRWLockExclusive; +static srw_func my_ReleaseSRWLockExclusive; +static srw_func my_AcquireSRWLockShared; +static srw_func my_ReleaseSRWLockShared; + +static srw_bool_func my_TryAcquireSRWLockExclusive; +static srw_bool_func my_TryAcquireSRWLockShared; + +/** + Check for presence of Windows slim reader writer lock function. + Load function pointers. +*/ + +static void check_srwlock_availability(void) +{ + HMODULE module= GetModuleHandle("kernel32"); + + my_InitializeSRWLock= (srw_func) GetProcAddress(module, + "InitializeSRWLock"); + my_AcquireSRWLockExclusive= (srw_func) GetProcAddress(module, + "AcquireSRWLockExclusive"); + my_AcquireSRWLockShared= (srw_func) GetProcAddress(module, + "AcquireSRWLockShared"); + my_ReleaseSRWLockExclusive= (srw_func) GetProcAddress(module, + "ReleaseSRWLockExclusive"); + my_ReleaseSRWLockShared= (srw_func) GetProcAddress(module, + "ReleaseSRWLockShared"); + my_TryAcquireSRWLockExclusive= (srw_bool_func) GetProcAddress(module, + "TryAcquireSRWLockExclusive"); + my_TryAcquireSRWLockShared= (srw_bool_func) GetProcAddress(module, + "TryAcquireSRWLockShared"); + + /* + We currently require TryAcquireSRWLockExclusive. This API is missing on + Vista, this means SRWLock are only used starting with Win7. + + If "trylock" usage for rwlocks is eliminated from server codebase (it is used + in a single place currently, in query cache), then SRWLock can be enabled on + Vista too. In this case condition below needs to be changed to e.g check + for my_InitializeSRWLock. + */ + + if (my_TryAcquireSRWLockExclusive) + have_srwlock= TRUE; + +} + + +static int srw_init(my_rw_lock_t *rwp) +{ + my_InitializeSRWLock(&rwp->srwlock); + rwp->have_exclusive_srwlock = FALSE; + return 0; +} + + +static int srw_rdlock(my_rw_lock_t *rwp) +{ + my_AcquireSRWLockShared(&rwp->srwlock); + return 0; +} + + +static int srw_tryrdlock(my_rw_lock_t *rwp) +{ + + if (!my_TryAcquireSRWLockShared(&rwp->srwlock)) + return EBUSY; + return 0; +} + + +static int srw_wrlock(my_rw_lock_t *rwp) +{ + my_AcquireSRWLockExclusive(&rwp->srwlock); + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + + +static int srw_trywrlock(my_rw_lock_t *rwp) +{ + if (!my_TryAcquireSRWLockExclusive(&rwp->srwlock)) + return EBUSY; + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + + +static int srw_unlock(my_rw_lock_t *rwp) +{ + if (rwp->have_exclusive_srwlock) + { + rwp->have_exclusive_srwlock= FALSE; + my_ReleaseSRWLockExclusive(&rwp->srwlock); + } + else + { + my_ReleaseSRWLockShared(&rwp->srwlock); + } + return 0; +} + +#endif /*_WIN32 */ + /* Source base from Sun Microsystems SPILT, simplified for MySQL use -- Joshua Chamas @@ -62,6 +175,22 @@ int my_rwlock_init(rw_lock_t *rwp, void *arg __attribute__((unused))) { pthread_condattr_t cond_attr; +#ifdef _WIN32 + /* + Once initialization is used here rather than in my_init(), in order to + - avoid my_init() pitfalls- (undefined order in which initialization should + run) + - be potentially useful C++ (static constructors) + - just to simplify the API. + Also, the overhead is of my_pthread_once is very small. + */ + static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT; + my_pthread_once(&once_control, check_srwlock_availability); + + if (have_srwlock) + return srw_init(rwp); +#endif + pthread_mutex_init( &rwp->lock, MY_MUTEX_INIT_FAST); pthread_condattr_init( &cond_attr ); pthread_cond_init( &rwp->readers, &cond_attr ); @@ -77,6 +206,10 @@ int my_rwlock_init(rw_lock_t *rwp, void *arg __attribute__((unused))) int my_rwlock_destroy(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return 0; /* no destroy function */ +#endif pthread_mutex_destroy( &rwp->lock ); pthread_cond_destroy( &rwp->readers ); pthread_cond_destroy( &rwp->writers ); @@ -86,6 +219,11 @@ int my_rwlock_destroy(rw_lock_t *rwp) int my_rw_rdlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_rdlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); /* active or queued writers */ @@ -100,6 +238,12 @@ int my_rw_rdlock(rw_lock_t *rwp) int my_rw_tryrdlock(rw_lock_t *rwp) { int res; + +#ifdef _WIN32 + if (have_srwlock) + return srw_tryrdlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); if ((rwp->state < 0 ) || rwp->waiters) res= EBUSY; /* Can't get lock */ @@ -115,6 +259,11 @@ int my_rw_tryrdlock(rw_lock_t *rwp) int my_rw_wrlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_wrlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); rwp->waiters++; /* another writer queued */ @@ -130,6 +279,12 @@ int my_rw_wrlock(rw_lock_t *rwp) int my_rw_trywrlock(rw_lock_t *rwp) { int res; + +#ifdef _WIN32 + if (have_srwlock) + return srw_trywrlock(rwp); +#endif + pthread_mutex_lock(&rwp->lock); if (rwp->state) res= EBUSY; /* Can't get lock */ @@ -145,6 +300,11 @@ int my_rw_trywrlock(rw_lock_t *rwp) int my_rw_unlock(rw_lock_t *rwp) { +#ifdef _WIN32 + if (have_srwlock) + return srw_unlock(rwp); +#endif + DBUG_PRINT("rw_unlock", ("state: %d waiters: %d", rwp->state, rwp->waiters)); pthread_mutex_lock(&rwp->lock); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index a9b81116a90..e0c7d2081f0 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -1172,7 +1172,29 @@ innobase_mysql_tmpfile(void) will be passed to fdopen(), it will be closed by invoking fclose(), which in turn will invoke close() instead of my_close(). */ + +#ifdef _WIN32 + /* Note that on Windows, the integer returned by mysql_tmpfile + has no relation to C runtime file descriptor. Here, we need + to call my_get_osfhandle to get the HANDLE and then convert it + to C runtime filedescriptor. */ + { + HANDLE hFile = my_get_osfhandle(fd); + HANDLE hDup; + BOOL bOK = + DuplicateHandle(GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + if(bOK) { + fd2 = _open_osfhandle((intptr_t)hDup,0); + } + else { + my_osmaperr(GetLastError()); + fd2 = -1; + } + } +#else fd2 = dup(fd); +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; diff --git a/storage/pbxt/src/pthread_xt.cc b/storage/pbxt/src/pthread_xt.cc index c5dc2e41fdd..284672d2c79 100755 --- a/storage/pbxt/src/pthread_xt.cc +++ b/storage/pbxt/src/pthread_xt.cc @@ -396,48 +396,7 @@ xtPublic int xt_p_cond_wait(xt_cond_type *cond, xt_mutex_type *mutex) xtPublic int xt_p_cond_timedwait(xt_cond_type *cond, xt_mutex_type *mt, struct timespec *abstime) { - pthread_mutex_t *mutex = &mt->mt_cs; - int result; - long timeout; - union ft64 now; - - if (abstime != NULL) { - GetSystemTimeAsFileTime(&now.ft); - - timeout = (long)((abstime->tv.i64 - now.i64) / 10000); - if (timeout < 0) - timeout = 0L; - if (timeout > abstime->max_timeout_msec) - timeout = abstime->max_timeout_msec; - } - else - timeout= INFINITE; - - WaitForSingleObject(cond->broadcast_block_event, INFINITE); - - EnterCriticalSection(&cond->lock_waiting); - cond->waiting++; - LeaveCriticalSection(&cond->lock_waiting); - - LeaveCriticalSection(mutex); - - result= WaitForMultipleObjects(2, cond->events, FALSE, timeout); - - EnterCriticalSection(&cond->lock_waiting); - cond->waiting--; - - if (cond->waiting == 0) { - /* The last waiter must reset the broadcast - * state (whther there was a broadcast or not)! - */ - ResetEvent(cond->events[xt_cond_type::BROADCAST]); - SetEvent(cond->broadcast_block_event); - } - LeaveCriticalSection(&cond->lock_waiting); - - EnterCriticalSection(mutex); - - return result == WAIT_TIMEOUT ? ETIMEDOUT : 0; + return pthread_cond_timedwait(cond, &(mt->mt_cs), abstime); } xtPublic int xt_p_join(pthread_t thread, void **value) From 8cde9162460cc2c3a7eddb972c6aa766f89d4965 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sun, 12 Jun 2011 16:44:41 +0200 Subject: [PATCH 016/359] fix mismerge --- storage/xtradb/os/os0file.c | 27 +++++++++++---------------- storage/xtradb/srv/srv0start.c | 4 +--- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index f0547f2e243..edf4e5d2744 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -2445,8 +2445,7 @@ _os_file_read( DWORD len; ibool retry; OVERLAPPED overlapped; - overlapped.Offset = (DWORD)offset; - overlapped.OffsetHigh = (DWORD)offset_high; + /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2466,8 +2465,8 @@ try_again: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; overlapped.hEvent = win_get_syncio_event(); ret = ReadFile(file, buf, n, NULL, &overlapped); if (ret) { @@ -2475,7 +2474,7 @@ try_again: } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - + } os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); @@ -2568,8 +2567,8 @@ try_again: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; overlapped.hEvent = win_get_syncio_event(); ret = ReadFile(file, buf, n, NULL, &overlapped); if (ret) { @@ -2577,7 +2576,7 @@ try_again: } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - + } os_mutex_enter(os_file_count_mutex); os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); @@ -2654,8 +2653,6 @@ os_file_write( ulint n_retries = 0; ulint err; OVERLAPPED overlapped; - overlapped.Offset = (DWORD)offset; - overlapped.OffsetHigh = (DWORD)offset_high; /* On 64-bit Windows, ulint is 64 bits. But offset and n should be no more than 32 bits. */ @@ -2674,12 +2671,14 @@ retry: os_mutex_exit(os_file_count_mutex); memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = low; - overlapped.OffsetHigh = high; + overlapped.Offset = (DWORD)offset; + overlapped.OffsetHigh = (DWORD)offset_high; + overlapped.hEvent = win_get_syncio_event(); ret = WriteFile(file, buf, n, NULL, &overlapped); if (ret) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); + } else if(GetLastError() == ERROR_IO_PENDING) { ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); } @@ -3831,10 +3830,6 @@ os_aio_windows_handle( BOOL retry = FALSE; ULONG_PTR dummy_key; - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - - ret = GetQueuedCompletionStatus(completion_port, &len, &dummy_key, (OVERLAPPED **)&slot, INFINITE); diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index e235bab3e02..bb1beb06ef5 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1284,10 +1284,8 @@ innobase_start_or_create_for_mysql(void) default: os_aio_use_native_aio = TRUE; srv_use_native_conditions = TRUE; - default: - /* On Win 2000 and XP use async i/o */ - os_aio_use_native_aio = TRUE; } +#endif if (srv_file_flush_method_str == NULL) { /* These are the default options */ From 06490b208acdf56b1999617f7fd7d7ec2613f059 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 13 Jun 2011 02:38:16 +0200 Subject: [PATCH 017/359] fix warnings --- client/mysqlslap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/client/mysqlslap.c b/client/mysqlslap.c index de1992e2d57..a876390db27 100644 --- a/client/mysqlslap.c +++ b/client/mysqlslap.c @@ -1382,9 +1382,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open create file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); parse_delimiter(tmp_string, &create_statements, delimiter[0]); @@ -1409,9 +1409,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_query) @@ -1440,9 +1440,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_pre_statements) @@ -1471,9 +1471,9 @@ get_options(int *argc,char ***argv) fprintf(stderr,"%s: Could not open query supplied file\n", my_progname); exit(1); } - tmp_string= (char *)my_malloc(sbuf.st_size + 1, + tmp_string= (char *)my_malloc((size_t)sbuf.st_size + 1, MYF(MY_ZEROFILL|MY_FAE|MY_WME)); - my_read(data_file, (uchar*) tmp_string, sbuf.st_size, MYF(0)); + my_read(data_file, (uchar*) tmp_string, (size_t)sbuf.st_size, MYF(0)); tmp_string[sbuf.st_size]= '\0'; my_close(data_file,MYF(0)); if (user_supplied_post_statements) From bf0e1f44c6b3306e84bbfd783c898a1764836242 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 13 Jun 2011 12:46:11 +0300 Subject: [PATCH 018/359] Fixed portability problem with partiton_error.test Added option to aria_read_log to crash recovery at certain points in the recovery process. Fixed bug that caused future recovery attempts to fail if we got a crash/got killed during closing of tables at end of recovery process. mysql-test/mysql-test-run.pl: Don't abort if 'var' points to stale place; Just remove it. mysql-test/suite/maria/r/maria.result: Fixed wrong indentation mysql-test/t/partition_error.test: Fixed portability problem with partiton_error.test storage/maria/ma_close.c: More DBUG_PRINT info storage/maria/ma_pagecache.c: Copy flush_log_callback when writing to page cache. This fixes problem in recovery when switching from mode of file storage/maria/ma_recovery.c: Added option to aria_read_log to crash recovery at certain points in the recovery process. storage/maria/ma_recovery.h: Added option to aria_read_log to crash recovery at certain points in the recovery process. storage/maria/maria_chk.c: Align aria_chk -d output Don't write warning Aria table '...' is usable but should be fixed if the table was before marked as crashed but now is ok storage/maria/maria_read_log.c: Added option to aria_read_log to crash recovery at certain points in the recovery process. --- mysql-test/mysql-test-run.pl | 15 ++++++++++----- mysql-test/suite/maria/r/maria.result | 2 +- mysql-test/t/partition_error.test | 5 ++--- storage/maria/ma_close.c | 3 ++- storage/maria/ma_pagecache.c | 9 ++++++++- storage/maria/ma_recovery.c | 15 ++++++++++++++- storage/maria/ma_recovery.h | 1 + storage/maria/maria_chk.c | 18 +++++++++++++++--- storage/maria/maria_read_log.c | 5 ++++- 9 files changed, 57 insertions(+), 16 deletions(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 8d11abc8a37..38042d1f272 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -2345,9 +2345,11 @@ sub remove_stale_vardir () { mtr_report(" - WARNING: Using the 'mysql-test/var' symlink"); # Make sure the directory where it points exist - mtr_error("The destination for symlink $opt_vardir does not exist") - if ! -d readlink($opt_vardir); - + if (! -d readlink($opt_vardir)) + { + mtr_report("The destination for symlink $opt_vardir does not exist; Removing it and creating a new var directory"); + unlink($opt_vardir); + } foreach my $bin ( glob("$opt_vardir/*") ) { mtr_verbose("Removing bin $bin"); @@ -2414,8 +2416,11 @@ sub setup_vardir() { # it's a symlink # Make sure the directory where it points exist - mtr_error("The destination for symlink $opt_vardir does not exist") - if ! -d readlink($opt_vardir); + if (! -d readlink($opt_vardir)) + { + mtr_report("The destination for symlink $opt_vardir does not exist; Removing it and creating a new var directory"); + unlink($opt_vardir); + } } elsif ( $opt_mem ) { diff --git a/mysql-test/suite/maria/r/maria.result b/mysql-test/suite/maria/r/maria.result index f7ce8efaa46..217955310f9 100644 --- a/mysql-test/suite/maria/r/maria.result +++ b/mysql-test/suite/maria/r/maria.result @@ -2131,7 +2131,7 @@ c3 VARCHAR(10) NOT NULL, KEY (c1), KEY (c2) ) ENGINE=aria DEFAULT CHARSET=utf8 PACK_KEYS=0; -Aria file: MYSQLD_DATADIR/test/t1 +Aria file: MYSQLD_DATADIR/test/t1 Record format: Block Crashsafe: yes Character set: utf8_general_ci (33) diff --git a/mysql-test/t/partition_error.test b/mysql-test/t/partition_error.test index 81bb7b55cec..9db3ff93634 100644 --- a/mysql-test/t/partition_error.test +++ b/mysql-test/t/partition_error.test @@ -695,14 +695,13 @@ CREATE TABLE t1 (a INT) PARTITION BY HASH (a); FLUSH TABLES; --remove_file $MYSQLD_DATADIR/test/t1.par --replace_result $MYSQLD_DATADIR ./ ---replace_result \\ / CHECK TABLE t1; ---replace_result \\ / +--replace_result $MYSQLD_DATADIR ./ --error 29 SELECT * FROM t1; --echo # Note that it is currently impossible to drop a partitioned table --echo # without the .par file ---replace_result \\ / +--replace_result $MYSQLD_DATADIR ./ --error 29 DROP TABLE t1; --remove_file $MYSQLD_DATADIR/test/t1.frm diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index e97664ebe42..2427dfd042d 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -28,7 +28,8 @@ int maria_close(register MARIA_HA *info) my_bool share_can_be_freed= FALSE; MARIA_SHARE *share= info->s; DBUG_ENTER("maria_close"); - DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", + DBUG_PRINT("enter",("name: '%s' base: 0x%lx reopen: %u locks: %u", + share->open_file_name.str, (long) info, (uint) share->reopen, (uint) share->tot_locks)); diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 0e44b0e7a22..8272aa970c5 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -1808,7 +1808,14 @@ restart: hash_link->requests++; DBUG_ASSERT(hash_link->block == 0); } - + else + { + /* + We have to copy the flush_log callback, as it may change if the table + goes from non_transactional to transactional during recovery + */ + hash_link->file.flush_log_callback= file->flush_log_callback; + } return hash_link; } diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 5be30dee0b5..8752f70668c 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -60,6 +60,7 @@ static int (*save_error_handler_hook)(uint, const char *,myf); static uint recovery_warnings; /**< count of warnings */ static uint recovery_found_crashed_tables; HASH tables_to_redo; /* For maria_read_log */ +ulong maria_recovery_force_crash_counter; #define prototype_redo_exec_hook(R) \ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) @@ -2939,6 +2940,12 @@ static int run_undo_phase(uint uncommitted) translog_free_record_header(&rec); } + /* Force a crash to test recovery of recovery */ + if (maria_recovery_force_crash_counter) + { + DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); + } + if (trnman_rollback_trn(trn)) DBUG_RETURN(1); /* We could want to span a few threads (4?) instead of 1 */ @@ -3436,6 +3443,12 @@ static int close_all_tables(void) prepare_table_for_close(info, addr); error|= maria_close(info); pthread_mutex_lock(&THR_LOCK_maria); + + /* Force a crash to test recovery of recovery */ + if (maria_recovery_force_crash_counter) + { + DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); + } } end: pthread_mutex_unlock(&THR_LOCK_maria); @@ -3510,7 +3523,7 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, /* Reset state pointers. This is needed as in ALTER table we may do - commit fllowed by _ma_renable_logging_for_table and then + commit followed by _ma_renable_logging_for_table and then info->state may point to a state that was deleted by _ma_trnman_end_trans_hook() */ diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index 5b22c4fd9b2..45dba0e86b3 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -32,4 +32,5 @@ int maria_apply_log(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply, my_bool take_checkpoints, uint *warnings_count); /* Table of tables to recover */ extern HASH tables_to_redo; +extern ulong maria_recovery_force_crash_counter; C_MODE_END diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index bb345f63b02..ec8af652fe3 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -949,6 +949,7 @@ static void get_options(register int *argc,register char ***argv) static int maria_chk(HA_CHECK *param, char *filename) { int error,lock_type,recreate; + uint warning_printed_by_chk_status; my_bool rep_quick= test(param->testflag & (T_QUICK | T_FORCE_UNIQUENESS)); MARIA_HA *info; File datafile; @@ -961,6 +962,7 @@ static int maria_chk(HA_CHECK *param, char *filename) recreate=0; datafile=0; param->isam_file_name=filename; /* For error messages */ + warning_printed_by_chk_status= 0; if (!(info=maria_open(filename, (param->testflag & (T_DESCRIPT | T_READONLY)) ? O_RDONLY : O_RDWR, @@ -1303,7 +1305,12 @@ static int maria_chk(HA_CHECK *param, char *filename) maria_chk_init_for_check(param, info); if (opt_warning_for_wrong_transid == 0) param->max_trid= ~ (ulonglong) 0; + error= maria_chk_status(param,info); + /* Forget warning printed by maria_chk_status if no problems found */ + warning_printed_by_chk_status= param->warning_printed; + param->warning_printed= 0; + maria_intersect_keys_active(share->state.key_map, param->keys_in_use); error|= maria_chk_size(param,info); if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) @@ -1371,8 +1378,12 @@ static int maria_chk(HA_CHECK *param, char *filename) (state_updated ? UPDATE_STAT : 0) | ((param->testflag & T_SORT_RECORDS) ? UPDATE_SORT : 0))); - if (!(param->testflag & T_SILENT)) + if (warning_printed_by_chk_status) + _ma_check_print_info(param, "Aria table '%s' was ok. Status updated", + filename); + else if (!(param->testflag & T_SILENT)) printf("State updated\n"); + warning_printed_by_chk_status= 0; } info->update&= ~HA_STATE_CHANGED; _ma_reenable_logging_for_table(info, FALSE); @@ -1426,7 +1437,7 @@ end2: "Aria table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", filename)); } - else if (param->warning_printed && + else if ((param->warning_printed || warning_printed_by_chk_status) && ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) { @@ -1435,6 +1446,7 @@ end2: VOID(fprintf(stderr, "Aria table '%s' is usable but should be fixed\n", filename)); } + VOID(fflush(stderr)); DBUG_RETURN(error); } /* maria_chk */ @@ -1464,7 +1476,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) DBUG_VOID_RETURN; } - printf("Aria file: %s\n",name); + printf("Aria file: %s\n",name); printf("Record format: %s\n", record_formats[share->data_file_type]); printf("Crashsafe: %s\n", share->base.born_transactional ? "yes" : "no"); diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index b335aa3692e..9471f01dbc8 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -166,7 +166,7 @@ err: #include "ma_check_standalone.h" enum options_mc { - OPT_CHARSETS_DIR=256 + OPT_CHARSETS_DIR=256, OPT_FORCE_CRASH }; static struct my_option my_long_options[] = @@ -186,6 +186,9 @@ static struct my_option my_long_options[] = #ifndef DBUG_OFF {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"force-crash", OPT_FORCE_CRASH, "Force crash after # recovery events", + &maria_recovery_force_crash_counter, 0,0, GET_ULONG, REQUIRED_ARG, + 0, 0, ~(long) 0, 0, 0, 0}, #endif {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, From 61578756c215247580be96bbd1f930d69ad639e8 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 13 Jun 2011 14:07:44 +0300 Subject: [PATCH 019/359] Added test case to show that we get a warning from CHECK TABLE if we force auto_increment value to 0 --- mysql-test/suite/maria/r/maria.result | 11 +++++++++++ mysql-test/suite/maria/t/maria.test | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/mysql-test/suite/maria/r/maria.result b/mysql-test/suite/maria/r/maria.result index e0f1175d845..077aaf074c5 100644 --- a/mysql-test/suite/maria/r/maria.result +++ b/mysql-test/suite/maria/r/maria.result @@ -22,6 +22,17 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; +create table t1 (a int primary key auto_increment) engine=aria; +insert into t1 values (1); +update t1 set a=0 where a=1; +check table t1; +Table Op Msg_type Msg_text +test.t1 check warning Found row where the auto_increment column has the value 0 +test.t1 check status OK +select * from t1; +a +0 +drop table t1; create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); check table t1; Table Op Msg_type Msg_text diff --git a/mysql-test/suite/maria/t/maria.test b/mysql-test/suite/maria/t/maria.test index e23821b8be9..40738525372 100644 --- a/mysql-test/suite/maria/t/maria.test +++ b/mysql-test/suite/maria/t/maria.test @@ -41,6 +41,16 @@ INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW CHECK TABLE t1; drop table t1; +# +# Test auto_increment warning +# +create table t1 (a int primary key auto_increment) engine=aria; +insert into t1 values (1); +update t1 set a=0 where a=1; +check table t1; +select * from t1; +drop table t1; + # # Test problem with rows that are 65517-65520 bytes long # From 53270955bb38eebc1a4f0b19810817a0543b8f33 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 13 Jun 2011 16:57:11 +0300 Subject: [PATCH 020/359] Change in PBXT to only use pth_set_priority() (not setpriority()) to set priority --- storage/pbxt/src/pthread_xt.cc | 39 ++++++++-------------------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/storage/pbxt/src/pthread_xt.cc b/storage/pbxt/src/pthread_xt.cc index c5dc2e41fdd..64c03db734c 100755 --- a/storage/pbxt/src/pthread_xt.cc +++ b/storage/pbxt/src/pthread_xt.cc @@ -547,44 +547,23 @@ xtPublic void xt_p_init_threading(void) xtPublic int xt_p_set_low_priority(pthread_t thr) { - if (pth_min_priority == pth_max_priority) { - /* Under Linux the priority of normal (non-runtime) - * threads are set using the standard methods - * for setting process priority. - */ - - /* We could set who == 0 because it should have the same affect - * as using the PID. - */ - - /* -20 = highest, 20 = lowest */ -#ifdef SET_GLOBAL_PRIORITY - if (setpriority(PRIO_PROCESS, getpid(), 20) == -1) - return errno; -#endif - return 0; - } - return pth_set_priority(thr, pth_min_priority); + if (pth_min_priority != pth_max_priority) + return pth_set_priority(thr, pth_min_priority); + return 0; } xtPublic int xt_p_set_normal_priority(pthread_t thr) { - if (pth_min_priority == pth_max_priority) { - if (setpriority(PRIO_PROCESS, getpid(), 0) == -1) - return errno; - return 0; - } - return pth_set_priority(thr, pth_normal_priority); + if (pth_min_priority != pth_max_priority) + return pth_set_priority(thr, pth_normal_priority); + return 0; } xtPublic int xt_p_set_high_priority(pthread_t thr) { - if (pth_min_priority == pth_max_priority) { - if (setpriority(PRIO_PROCESS, getpid(), -20) == -1) - return errno; - return 0; - } - return pth_set_priority(thr, pth_max_priority); + if (pth_min_priority != pth_max_priority) + return pth_set_priority(thr, pth_max_priority); + return 0; } #ifdef DEBUG_LOCKING From 56eb6d7e69ecce856e2d54e2404157407cb7203b Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 13 Jun 2011 19:03:03 -0700 Subject: [PATCH 021/359] Fixed LP bug #794890. Changed the code that processing of multi-updates and multi-deletes with multitable views at the prepare stage. A proper solution would be: never to perform any transformations of views before and at the prepare stage. Yet it would require re-engineering of the code that checks privileges and updatability of views. Ultimately this re-engineering has to be done to provide a clean solution for INSERT/UPDATE/DELETE statements that use views. Fixed a valgrind problem in the function TABLE::use_index. --- mysql-test/r/derived_view.result | 25 +++++++++ mysql-test/t/derived_view.test | 24 +++++++++ sql/sql_base.cc | 15 ++++-- sql/sql_class.cc | 1 + sql/sql_class.h | 2 + sql/sql_delete.cc | 23 +++++--- sql/sql_derived.cc | 92 ++++++++++++++++++-------------- sql/sql_lex.cc | 30 +++++++++++ sql/sql_lex.h | 3 ++ sql/sql_prepare.cc | 13 +++-- sql/sql_update.cc | 8 ++- sql/table.cc | 2 +- 12 files changed, 180 insertions(+), 58 deletions(-) diff --git a/mysql-test/r/derived_view.result b/mysql-test/r/derived_view.result index 5e3fc1e8cf1..2b0ef412460 100644 --- a/mysql-test/r/derived_view.result +++ b/mysql-test/r/derived_view.result @@ -589,3 +589,28 @@ f1 f1 224 224 DROP VIEW v1; DROP TABLE t1,t2; +# +# LP bug #794890: abort failure on multi-update with view +# +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (20), (7); +CREATE TABLE t2 (a int); +INSERT INTO t2 VALUES (7), (9), (7); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT a FROM t1; +CREATE VIEW v2 AS SELECT t2.a FROM t2, v1 WHERE t2.a=t2.a; +UPDATE v2 SET a = 2; +SELECT * FROM t2; +a +2 +2 +2 +UPDATE t1,v2 SET t1.a = 3; +SELECT * FROM t1; +a +3 +3 +DELETE t1 FROM t1,v2; +SELECT * FROM t1; +a +DROP VIEW v1,v2; +DROP TABLE t1,t2; diff --git a/mysql-test/t/derived_view.test b/mysql-test/t/derived_view.test index 93c2eb48fcd..e5d5b0f797d 100644 --- a/mysql-test/t/derived_view.test +++ b/mysql-test/t/derived_view.test @@ -235,3 +235,27 @@ SELECT * FROM v1 JOIN t2 ON v1.f1 = t2.f1; DROP VIEW v1; DROP TABLE t1,t2; + +--echo # +--echo # LP bug #794890: abort failure on multi-update with view +--echo # + +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (20), (7); +CREATE TABLE t2 (a int); +INSERT INTO t2 VALUES (7), (9), (7); + +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT a FROM t1; + +CREATE VIEW v2 AS SELECT t2.a FROM t2, v1 WHERE t2.a=t2.a; +UPDATE v2 SET a = 2; +SELECT * FROM t2; + +UPDATE t1,v2 SET t1.a = 3; +SELECT * FROM t1; + +DELETE t1 FROM t1,v2; +SELECT * FROM t1; + +DROP VIEW v1,v2; +DROP TABLE t1,t2; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 136ae9cb9f0..dd1c89a8fb4 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -7789,9 +7789,18 @@ bool setup_tables(THD *thd, Name_resolution_context *context, if (select_lex->first_cond_optimization) { leaves.empty(); - select_lex->leaf_tables_exec.empty(); - make_leaves_list(leaves, tables, full_table_list, first_select_table); - + if (!select_lex->is_prep_leaf_list_saved) + { + make_leaves_list(leaves, tables, full_table_list, first_select_table); + select_lex->leaf_tables_exec.empty(); + } + else + { + List_iterator_fast ti(select_lex->leaf_tables_prep); + while ((table_list= ti++)) + leaves.push_back(table_list); + } + while ((table_list= ti++)) { TABLE *table= table_list->table; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 925a39afa33..d8aece52c53 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -814,6 +814,7 @@ THD::THD() memset(&invoker_user, 0, sizeof(invoker_user)); memset(&invoker_host, 0, sizeof(invoker_host)); prepare_derived_at_open= FALSE; + save_prep_leaf_list= FALSE; } diff --git a/sql/sql_class.h b/sql/sql_class.h index aec7839455e..152d65e9d9e 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1571,6 +1571,8 @@ public: bool prepare_derived_at_open; + bool save_prep_leaf_list; + #ifndef MYSQL_CLIENT int binlog_setup_trx_data(); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 6ace7ba8086..7f3ed2e705f 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -61,8 +61,9 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (open_and_lock_tables(thd, table_list)) DBUG_RETURN(TRUE); - if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) || - mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE)) + if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE)) DBUG_RETURN(TRUE); if (!table_list->updatable) @@ -550,7 +551,7 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds) fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array)) DBUG_RETURN(TRUE); - select_lex->fix_prepare_information(thd, conds, &fake_conds); + select_lex->fix_prepare_information(thd, conds, &fake_conds); DBUG_RETURN(FALSE); } @@ -586,10 +587,11 @@ int mysql_multi_delete_prepare(THD *thd) TABLE_LIST *target_tbl; DBUG_ENTER("mysql_multi_delete_prepare"); - TABLE_LIST *tables= lex->query_tables; - if (mysql_handle_derived(lex, DT_INIT) || - mysql_handle_list_of_derived(lex, tables, DT_MERGE_FOR_INSERT) || - mysql_handle_list_of_derived(lex, tables, DT_PREPARE)) + if (mysql_handle_derived(lex, DT_INIT)) + DBUG_RETURN(TRUE); + if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (mysql_handle_derived(lex, DT_PREPARE)) DBUG_RETURN(TRUE); /* setup_tables() need for VIEWs. JOIN::prepare() will not do it second @@ -616,7 +618,8 @@ int mysql_multi_delete_prepare(THD *thd) target_tbl= target_tbl->next_local) { - if (!(target_tbl->table= target_tbl->correspondent_table->table)) + target_tbl->table= target_tbl->correspondent_table->table; + if (target_tbl->correspondent_table->is_multitable()) { my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), target_tbl->correspondent_table->view_db.str, @@ -651,6 +654,10 @@ int mysql_multi_delete_prepare(THD *thd) with further calls to unique_table */ lex->select_lex.exclude_from_table_unique_test= FALSE; + + if (lex->select_lex.save_prep_leaf_tables(thd)) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); } diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 9bfa49c5f2d..7474566b184 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -85,15 +85,18 @@ mysql_handle_derived(LEX *lex, uint phases) cursor && !res; cursor= cursor->next_local) { + if (!cursor->is_view_or_derived() && phases == DT_MERGE_FOR_INSERT) + continue; uint8 allowed_phases= (cursor->is_merged_derived() ? DT_PHASES_MERGE : - DT_PHASES_MATERIALIZE); + DT_PHASES_MATERIALIZE | DT_MERGE_FOR_INSERT); /* Skip derived tables to which the phase isn't applicable. TODO: mark derived at the parse time, later set it's type (merged or materialized) */ if ((phase_flag != DT_PREPARE && !(allowed_phases & phase_flag)) || - (cursor->merged_for_insert && phase_flag != DT_REINIT)) + (cursor->merged_for_insert && phase_flag != DT_REINIT && + phase_flag != DT_PREPARE)) continue; res= (*processors[phase])(lex->thd, lex, cursor); } @@ -345,41 +348,43 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived) arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test derived->merged= TRUE; - /* - Check whether there is enough free bits in table map to merge subquery. - If not - materialize it. This check isn't cached so when there is a big - and small subqueries, and the bigger one can't be merged it wouldn't - block the smaller one. - */ - if (parent_lex->get_free_table_map(&map, &tablenr)) - { - /* There is no enough table bits, fall back to materialization. */ - derived->change_refs_to_fields(); - derived->set_materialized_derived(); - goto exit_merge; - } - - if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES) - { - /* There is no enough table bits, fall back to materialization. */ - derived->change_refs_to_fields(); - derived->set_materialized_derived(); - goto exit_merge; - } - - if (dt_select->options & OPTION_SCHEMA_TABLE) - parent_lex->options |= OPTION_SCHEMA_TABLE; - - parent_lex->cond_count+= dt_select->cond_count; - - if (!derived->get_unit()->prepared) - { - dt_select->leaf_tables.empty(); - make_leaves_list(dt_select->leaf_tables, derived, TRUE, 0); - } if (!derived->merged_for_insert) - { derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN)); + { + /* + Check whether there is enough free bits in table map to merge subquery. + If not - materialize it. This check isn't cached so when there is a big + and small subqueries, and the bigger one can't be merged it wouldn't + block the smaller one. + */ + if (parent_lex->get_free_table_map(&map, &tablenr)) + { + /* There is no enough table bits, fall back to materialization. */ + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + goto exit_merge; + } + + if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES) + { + /* There is no enough table bits, fall back to materialization. */ + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + goto exit_merge; + } + + if (dt_select->options & OPTION_SCHEMA_TABLE) + parent_lex->options |= OPTION_SCHEMA_TABLE; + + parent_lex->cond_count+= dt_select->cond_count; + + if (!derived->get_unit()->prepared) + { + dt_select->leaf_tables.empty(); + make_leaves_list(dt_select->leaf_tables, derived, TRUE, 0); + } + + derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN)); if (!derived->nested_join) { res= TRUE; @@ -467,14 +472,16 @@ bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived) if (derived->merged_for_insert) return FALSE; - /* It's a target view for an INSERT, create field translation only. */ - if (!derived->updatable || derived->is_materialized_derived()) + if (derived->is_materialized_derived()) { - bool res= derived->create_field_translation(thd); + bool res= mysql_derived_prepare(thd, lex, derived); + derived->select_lex->leaf_tables.push_back(derived); return res; } if (!derived->is_multitable()) { + if (!derived->updatable) + return derived->create_field_translation(thd); TABLE_LIST *tl=((TABLE_LIST*)dt_select->table_list.first); TABLE *table= tl->table; /* preserve old map & tablenr. */ @@ -504,6 +511,9 @@ bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived) } else { + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command != SQLCOM_DELETE_MULTI) + thd->save_prep_leaf_list= TRUE; if (!derived->merged_for_insert && mysql_derived_merge(thd, lex, derived)) return TRUE; } @@ -609,7 +619,11 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived) bool res= FALSE; // Skip already prepared views/DT - if (!unit || unit->prepared || derived->merged_for_insert) + if (!unit || unit->prepared || + (derived->merged_for_insert && + !(derived->is_multitable() && + (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI)))) DBUG_RETURN(FALSE); Query_arena *arena= thd->stmt_arena, backup; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index d3613d0d660..27562238734 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1604,6 +1604,7 @@ void st_select_lex::init_query() top_join_list.empty(); join_list= &top_join_list; embedding= 0; + leaf_tables_prep.empty(); leaf_tables.empty(); item_list.empty(); join= 0; @@ -1672,6 +1673,7 @@ void st_select_lex::init_select() cond_value= having_value= Item::COND_UNDEF; inner_refs_list.empty(); full_group_by_flag= 0; + is_prep_leaf_list_saved= FALSE; insert_tables= 0; merged_into= 0; } @@ -3585,6 +3587,7 @@ void SELECT_LEX::mark_const_derived(bool empty) } } + bool st_select_lex::save_leaf_tables(THD *thd) { Query_arena *arena= thd->stmt_arena, backup; @@ -3609,6 +3612,33 @@ bool st_select_lex::save_leaf_tables(THD *thd) } +bool st_select_lex::save_prep_leaf_tables(THD *thd) +{ + if (!thd->save_prep_leaf_list) + return 0; + + Query_arena *arena= thd->stmt_arena, backup; + if (arena->is_conventional()) + arena= 0; + else + thd->set_n_backup_active_arena(arena, &backup); + + List_iterator_fast li(leaf_tables); + TABLE_LIST *table; + while ((table= li++)) + { + if (leaf_tables_prep.push_back(table)) + return 1; + } + thd->lex->select_lex.is_prep_leaf_list_saved= TRUE; + thd->save_prep_leaf_list= FALSE; + if (arena) + thd->restore_active_arena(arena, &backup); + + return 0; +} + + /** A routine used by the parser to decide whether we are specifying a full partitioning or if only partitions to add or to split. diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 5a8b5a5f361..57eeb963b89 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -639,6 +639,8 @@ public: */ List leaf_tables; List leaf_tables_exec; + List leaf_tables_prep; + bool is_prep_leaf_list_saved; uint insert_tables; st_select_lex *merged_into; /* select which this select is merged into */ /* (not 0 only for views/derived tables) */ @@ -899,6 +901,7 @@ public: void mark_const_derived(bool empty); bool save_leaf_tables(THD *thd); + bool save_prep_leaf_tables(THD *thd); private: /* current index hint kind. used in filling up index_hints */ diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index ece4b8a1014..5b5ed004006 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1269,8 +1269,9 @@ static int mysql_test_update(Prepared_statement *stmt, thd->fill_derived_tables() is false here for sure (because it is preparation of PS, so we even do not check it). */ - if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT) || - table_list->handle_derived(thd->lex, DT_PREPARE)) + if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + goto error; + if (table_list->handle_derived(thd->lex, DT_PREPARE)) goto error; if (!table_list->updatable) @@ -1340,9 +1341,11 @@ static bool mysql_test_delete(Prepared_statement *stmt, open_tables(thd, &table_list, &table_count, 0)) goto error; - if (mysql_handle_derived(thd->lex, DT_INIT) || - mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) || - mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE)) + if (mysql_handle_derived(thd->lex, DT_INIT)) + goto error; + if (mysql_handle_derived(thd->lex, DT_MERGE_FOR_INSERT)) + goto error; + if (mysql_handle_derived(thd->lex, DT_PREPARE)) goto error; if (!table_list->updatable) diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 4821fc2bd8f..0bfbcf2b23f 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -1038,8 +1038,9 @@ reopen_tables: call in setup_tables()). */ //We need to merge for insert prior to prepare. - if (mysql_handle_list_of_derived(lex, table_list, DT_MERGE_FOR_INSERT)) - DBUG_RETURN(1); + if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT)) + DBUG_RETURN(TRUE); + if (mysql_handle_derived(lex, DT_PREPARE)) DBUG_RETURN(TRUE); @@ -1237,6 +1238,9 @@ reopen_tables: further check in multi_update::prepare whether to use record cache. */ lex->select_lex.exclude_from_table_unique_test= FALSE; + + if (lex->select_lex.save_prep_leaf_tables(thd)) + DBUG_RETURN(TRUE); DBUG_RETURN (FALSE); } diff --git a/sql/table.cc b/sql/table.cc index 8427ca6e112..512214b155e 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5372,7 +5372,7 @@ void TABLE::use_index(int key_to_save) DBUG_ASSERT(!created && key_to_save < (int)s->keys); if (key_to_save >= 0) /* Save the given key. */ - memcpy(key_info, key_info + key_to_save, sizeof(KEY)); + memmove(key_info, key_info + key_to_save, sizeof(KEY)); else /* Drop all keys; */ i= 0; From 565b7feea86f1a192f809be41abd03c275ddb126 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 13 Jun 2011 22:18:40 -0700 Subject: [PATCH 022/359] Fixed a typo in the patch for bug 794890. --- sql/sql_derived.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 7474566b184..29cb3ede15d 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -512,7 +512,7 @@ bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived) else { if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || - thd->lex->sql_command != SQLCOM_DELETE_MULTI) + thd->lex->sql_command == SQLCOM_DELETE_MULTI) thd->save_prep_leaf_list= TRUE; if (!derived->merged_for_insert && mysql_derived_merge(thd, lex, derived)) return TRUE; From 2c243b3199dd220a88476648b02aacd059968304 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 15 Jun 2011 19:44:00 +0200 Subject: [PATCH 023/359] fix "./configure --with-debug" builds (without CFLAGS=-DSAFEMALLOC). --- storage/heap/hp_test2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c index bf06cf03035..67471e93a7a 100644 --- a/storage/heap/hp_test2.c +++ b/storage/heap/hp_test2.c @@ -22,7 +22,7 @@ #undef DBUG_OFF #endif #ifndef SAFEMALLOC -#define SAFEMALLOC +#define SAFEMALLOC 1 #endif #include "heapdef.h" /* Because of hp_find_block */ From 018614e2b348598f4518e11156fb44c388d2bf32 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 15 Jun 2011 20:30:10 +0200 Subject: [PATCH 024/359] ./mtr --suite funcs_1 --ps-protocol --- .../funcs_1/r/processlist_priv_ps.result | 164 +++++----- .../suite/funcs_1/r/processlist_val_ps.result | 284 ++++++++++++------ 2 files changed, 276 insertions(+), 172 deletions(-) diff --git a/mysql-test/suite/funcs_1/r/processlist_priv_ps.result b/mysql-test/suite/funcs_1/r/processlist_priv_ps.result index c5aabfe315c..c05c9549334 100644 --- a/mysql-test/suite/funcs_1/r/processlist_priv_ps.result +++ b/mysql-test/suite/funcs_1/r/processlist_priv_ps.result @@ -29,28 +29,29 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `COMMAND` varchar(16) NOT NULL DEFAULT '', `TIME` int(7) NOT NULL DEFAULT '0', `STATE` varchar(64) DEFAULT NULL, - `INFO` longtext + `INFO` longtext, + `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000' ) DEFAULT CHARSET=utf8 SHOW processlist; Id User Host db Command Time State Info ID root HOST_NAME information_schema Query TIME NULL SHOW processlist ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL SELECT * FROM processlist ORDER BY id; -ID USER HOST DB COMMAND TIME STATE INFO -ID root HOST_NAME information_schema Execute TIME executing SELECT * FROM processlist ORDER BY id -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO FROM processlist ORDER BY id; -ID USER HOST DB COMMAND TIME STATE INFO -ID root HOST_NAME information_schema Execute TIME executing SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO FROM processlist ORDER BY id -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID root HOST_NAME information_schema Execute TIME executing SELECT * FROM processlist ORDER BY id TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS FROM processlist ORDER BY id; +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID root HOST_NAME information_schema Execute TIME executing SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS FROM processlist ORDER BY id TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS CREATE TEMPORARY TABLE test.t_processlist AS SELECT * FROM processlist; UPDATE test.t_processlist SET user='horst' WHERE id=1 ; INSERT INTO processlist SELECT * FROM test.t_processlist; ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema' DROP TABLE test.t_processlist; -CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO) AS SELECT * FROM processlist WITH CHECK OPTION; +CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS) AS SELECT * FROM processlist WITH CHECK OPTION; ERROR HY000: CHECK OPTION on non-updatable view 'test.v_processlist' -CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO) AS SELECT * FROM processlist; +CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS) AS SELECT * FROM processlist; UPDATE test.v_processlist SET TIME=NOW() WHERE id = 1; ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema' DROP VIEW test.v_processlist; @@ -99,25 +100,26 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `COMMAND` varchar(16) NOT NULL DEFAULT '', `TIME` int(7) NOT NULL DEFAULT '0', `STATE` varchar(64) DEFAULT NULL, - `INFO` longtext + `INFO` longtext, + `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000' ) DEFAULT CHARSET=utf8 SHOW processlist; Id User Host db Command Time State Info ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM processlist ORDER BY id; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM processlist ORDER BY id -SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO FROM processlist ORDER BY id; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO FROM processlist ORDER BY id +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM processlist ORDER BY id TIME_MS +SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS FROM processlist ORDER BY id; +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS FROM processlist ORDER BY id TIME_MS CREATE TEMPORARY TABLE test.t_processlist AS SELECT * FROM processlist; UPDATE test.t_processlist SET user='horst' WHERE id=1 ; INSERT INTO processlist SELECT * FROM test.t_processlist; ERROR 42000: Access denied for user 'ddicttestuser1'@'localhost' to database 'information_schema' DROP TABLE test.t_processlist; -CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO) AS SELECT * FROM processlist WITH CHECK OPTION; +CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS) AS SELECT * FROM processlist WITH CHECK OPTION; ERROR HY000: CHECK OPTION on non-updatable view 'test.v_processlist' -CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO) AS SELECT * FROM processlist; +CREATE VIEW test.v_processlist (ID, USER, HOST, DB, COMMAND, TIME, STATE, INFO, TIME_MS) AS SELECT * FROM processlist; UPDATE test.v_processlist SET TIME=NOW() WHERE id = 1; ERROR 42000: Access denied for user 'ddicttestuser1'@'localhost' to database 'information_schema' DROP VIEW test.v_processlist; @@ -170,8 +172,8 @@ SHOW processlist; Id User Host db Command Time State Info ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS #################################################################################### 4.2 New connection con101 (ddicttestuser1 with PROCESS privilege) SHOW/SELECT shows all processes/threads. @@ -185,10 +187,10 @@ ID root HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID root HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID root HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 5 Grant PROCESS privilege to anonymous user. connection default (user=root) @@ -209,11 +211,11 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID root HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID root HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 6 Revoke PROCESS privilege from ddicttestuser1 connection default (user=root) @@ -233,10 +235,10 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 7 Revoke PROCESS privilege from anonymous user connection default (user=root) @@ -251,9 +253,9 @@ SHOW GRANTS FOR ''@'localhost'; Grants for @localhost GRANT USAGE ON *.* TO ''@'localhost' SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 8 Grant SUPER (does not imply PROCESS) privilege to ddicttestuser1 connection default (user=root) @@ -273,11 +275,11 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 9 Revoke SUPER privilege from user ddicttestuser1 connection default (user=root) @@ -299,12 +301,12 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 10 Grant SUPER privilege with grant option to user ddicttestuser1. connection default (user=root) @@ -353,18 +355,18 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser2 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser2 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID root HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser2 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID root HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 11 User ddicttestuser1 revokes PROCESS privilege from user ddicttestuser2 connection ddicttestuser1; @@ -382,9 +384,9 @@ Id User Host db Command Time State Info ID ddicttestuser2 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser2 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser2 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser2 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser2 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser2 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 11.2 Revoke SUPER,PROCESS,GRANT OPTION privilege from user ddicttestuser1 connection default (user=root) @@ -411,15 +413,15 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 12 Revoke the SELECT privilege from user ddicttestuser1 connection default (user=root) @@ -447,16 +449,16 @@ ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL ID ddicttestuser1 HOST_NAME information_schema Query TIME NULL SHOW processlist SELECT * FROM information_schema.processlist; -ID USER HOST DB COMMAND TIME STATE INFO -ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL -ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Execute TIME executing SELECT * FROM information_schema.processlist TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS +ID ddicttestuser1 HOST_NAME information_schema Sleep TIME NULL TIME_MS #################################################################################### 12.2 Revoke only the SELECT privilege on the information_schema from ddicttestuser1. connection default (user=root) diff --git a/mysql-test/suite/funcs_1/r/processlist_val_ps.result b/mysql-test/suite/funcs_1/r/processlist_val_ps.result index 448c68eadb8..86f290daea7 100644 --- a/mysql-test/suite/funcs_1/r/processlist_val_ps.result +++ b/mysql-test/suite/funcs_1/r/processlist_val_ps.result @@ -19,140 +19,242 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `COMMAND` varchar(16) NOT NULL DEFAULT '', `TIME` int(7) NOT NULL DEFAULT '0', `STATE` varchar(64) DEFAULT NULL, - `INFO` longtext + `INFO` longtext, + `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000' ) DEFAULT CHARSET=utf8 -SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST; -COUNT(*) -1 -USE test; +# Ensure that the information about the own connection is correct. +#-------------------------------------------------------------------------- + SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST; -ID USER HOST DB COMMAND TIME STATE INFO - root localhost test Execute 0 executing SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST +ID USER HOST DB COMMAND TIME STATE INFO TIME_MS + root test Execute