From 447e23392451f1c91c78a7465fb1680e8f21384b Mon Sep 17 00:00:00 2001 From: Balasubramanian Kandasamy Date: Mon, 27 Jun 2016 12:48:57 +0530 Subject: [PATCH 01/96] Raise version number after cloning 5.5.51 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index db9d497c141..acabf9b42d0 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=51 +MYSQL_VERSION_PATCH=52 MYSQL_VERSION_EXTRA= From 6986645c7924ef3292e233d9365520b29eb8fa0d Mon Sep 17 00:00:00 2001 From: Christopher Powers Date: Thu, 30 Jun 2016 20:42:29 +0200 Subject: [PATCH 02/96] Bug#14111584 PB2: PERFSCHEMA.AGGREGATE FAILS ON PB2 SPORADICALLY Permanently removed test case perfschema.aggregate. The Performance Schema is generally lock-free, allowing for race conditions that might arise from multi-threaded operation which occasionally results in temporary and/or minor variances when aggregating statistics. This test needs to be redesigned to accommodate such variances. --- .../suite/perfschema/r/aggregate.result | 102 ---------- mysql-test/suite/perfschema/t/aggregate.test | 174 ------------------ 2 files changed, 276 deletions(-) delete mode 100644 mysql-test/suite/perfschema/r/aggregate.result delete mode 100644 mysql-test/suite/perfschema/t/aggregate.test diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result deleted file mode 100644 index edc7ce0bcca..00000000000 --- a/mysql-test/suite/perfschema/r/aggregate.result +++ /dev/null @@ -1,102 +0,0 @@ -"General cleanup" -drop table if exists t1; -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -create table t1 ( -id INT PRIMARY KEY, -b CHAR(100) DEFAULT 'initial value') -ENGINE=MyISAM; -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); -update performance_schema.setup_instruments SET enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -set @dump_all=FALSE; -"Verifying file aggregate consistency" -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; -EVENT_NAME COUNT_READ SUM(i.COUNT_READ) -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; -EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -"Verifying waits aggregate consistency (instance)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT) -"Verifying waits aggregate consistency (thread)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT) -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -drop table test.t1; diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test deleted file mode 100644 index 91794f04346..00000000000 --- a/mysql-test/suite/perfschema/t/aggregate.test +++ /dev/null @@ -1,174 +0,0 @@ -# Tests for PERFORMANCE_SCHEMA -# Verify that statistics aggregated by different criteria are consistent. - ---source include/not_embedded.inc ---source include/have_perfschema.inc - ---echo "General cleanup" - ---disable_warnings -drop table if exists t1; ---enable_warnings - -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; - -# Cleanup statistics -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; - -# Start recording data -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - - -create table t1 ( - id INT PRIMARY KEY, - b CHAR(100) DEFAULT 'initial value') - ENGINE=MyISAM; - -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); - -# Stop recording data, so the select below don't add noise. -update performance_schema.setup_instruments SET enabled = 'NO'; -# Disable all consumers, for long standing waits -update performance_schema.setup_consumers set enabled = 'NO'; - -# Helper to debug -set @dump_all=FALSE; - -# Note that in general: -# - COUNT/SUM/MAX(file_summary_by_event_name) >= -# COUNT/SUM/MAX(file_summary_by_instance). -# - MIN(file_summary_by_event_name) <= -# MIN(file_summary_by_instance). -# There will be equality only when file instances are not removed, -# aka when a file is not deleted from the file system, -# because doing so removes a row in file_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_instance) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_instance) -# There will be equality only when an instrument instance -# is not removed, which is next to impossible to predictably guarantee -# in the server. -# For example, a MyISAM table removed from the table cache -# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock. -# Another example, a thread terminating will cause a mysql_mutex_destroy -# on sql/LOCK_delete -# Both cause a row to be deleted from events_waits_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_thread_by_event_name) -# There will be equality only when no thread is removed, -# that is if no thread disconnects, or no sub thread (for example insert -# delayed) ever completes. -# A thread completing will cause rows in -# events_waits_summary_by_thread_by_event_name to be removed. - ---echo "Verifying file aggregate consistency" - -# Since the code generating the load in this test does: -# - create table -# - insert -# - does not cause temporary tables to be used -# we can test for equality here for file aggregates. - -# If any of these queries returns data, the test failed. - -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (instance)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (thread)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; - - -# Cleanup - -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - -drop table test.t1; From 07a33cdcef16b21c5d9bb5f0c538066e24eb8dc0 Mon Sep 17 00:00:00 2001 From: Kailasnath Nagarkar Date: Fri, 1 Jul 2016 12:01:27 +0530 Subject: [PATCH 03/96] Bug #23296299 : HANDLE_FATAL_SIGNAL (SIG=11) IN MY_TOSORT_UTF32 This patch is specific for mysql-5.5 ISSUE: When a charater that is larger than possible to handle is passed to function my_tosort_utf32(), it results in segmentation fault. In the scenario mentioned in the bug AES_ENCRYPT function is used which returns large value. This value is further passed to my_tosort_utf32 function. This causes to cross array bound for array uni_plane, resulting in segment violation. SOLUTION: This issue has got addressed in 5.6 onward releases through worklog 2673. The fix is similar backport of that. Check for maximum character before accessing the array uni_plane. In addition to function my_tosort_utf32, the same potential problem is also present in functions my_tolower_utf16, my_toupper_utf16, my_tosort_utf16, my_tolower_utf32, my_toupper_utf32, my_tosort_unicode, my_tolower_utf8mb4 and my_toupper_utf8mb4. Fixed these functions as well. --- include/m_ctype.h | 4 ++-- strings/ctype-ucs2.c | 14 +++++++------- strings/ctype-utf8.c | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/m_ctype.h b/include/m_ctype.h index 81096f60c78..87b1e529f65 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,11 +33,11 @@ extern "C" { #define MY_CS_TO_UPPER_TABLE_SIZE 256 #define MY_CS_SORT_ORDER_TABLE_SIZE 256 #define MY_CS_TO_UNI_TABLE_SIZE 256 - #define CHARSET_DIR "charsets/" #define my_wc_t ulong +#define MY_CS_MAX_CHAR 0xFFFF #define MY_CS_REPLACEMENT_CHARACTER 0xFFFD /* diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 37fd1b5349f..f1beff82a40 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -1099,7 +1099,7 @@ static inline void my_tolower_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -1108,7 +1108,7 @@ static inline void my_toupper_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } @@ -1117,7 +1117,7 @@ static inline void my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256) + if (page < 256 && *wc <= MY_CS_MAX_CHAR) { if (uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].sort; @@ -1728,7 +1728,7 @@ static inline void my_tolower_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -1737,7 +1737,7 @@ static inline void my_toupper_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } @@ -1746,7 +1746,7 @@ static inline void my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256) + if (page < 256 && *wc <= MY_CS_MAX_CHAR) { if (uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].sort; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 52e05f17d61..33e5703ffd9 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -1941,7 +1941,7 @@ static inline void my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256) + if (page < 256 && *wc <= MY_CS_MAX_CHAR) { if (uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].sort; @@ -5023,7 +5023,7 @@ static inline void my_tolower_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -5032,7 +5032,7 @@ static inline void my_toupper_utf8mb4(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { int page= *wc >> 8; - if (page < 256 && uni_plane[page]) + if (page < 256 && *wc <= MY_CS_MAX_CHAR && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } From 09784e244bfbb16d3a1b54d1018995984ac8c121 Mon Sep 17 00:00:00 2001 From: Balasubramanian Kandasamy Date: Tue, 5 Jul 2016 17:08:37 +0530 Subject: [PATCH 04/96] Bug#23736787 - YUM UPDATE FAIL FROM 5.5.51(COMUNITY/COMMERCIAL) TO 5.6.32(COMUNITY/COMMERCIAL) Remove mysql_config from client sub-package --- packaging/rpm-oel/mysql.spec.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packaging/rpm-oel/mysql.spec.in b/packaging/rpm-oel/mysql.spec.in index 29957d98ed0..409c325b675 100644 --- a/packaging/rpm-oel/mysql.spec.in +++ b/packaging/rpm-oel/mysql.spec.in @@ -835,8 +835,6 @@ fi %attr(755, root, root) %{_bindir}/mysqlimport %attr(755, root, root) %{_bindir}/mysqlshow %attr(755, root, root) %{_bindir}/mysqlslap -%attr(755, root, root) %{_bindir}/mysql_config -%attr(755, root, root) %{_bindir}/mysql_config-%{__isa_bits} %attr(644, root, root) %{_mandir}/man1/msql2mysql.1* %attr(644, root, root) %{_mandir}/man1/mysql.1* @@ -918,6 +916,9 @@ fi %endif %changelog +* Tue Jul 05 2016 Balasubramanian Kandasamy - 5.5.51-1 +- Remove mysql_config from client subpackage + * Tue Sep 29 2015 Balasubramanian Kandasamy - 5.5.47-1 - Added conflicts to mysql-connector-c-shared dependencies From 54e887b2fe24056b633957e8f9aff9d7f50b5089 Mon Sep 17 00:00:00 2001 From: Chaithra Gopalareddy Date: Tue, 19 Jul 2016 08:03:09 +0530 Subject: [PATCH 05/96] Bug#23280059: ITEM_ROW::ILLEGAL_METHOD_CALL(CONST CHAR*): ASSERTION `0' FAILED ON SELECT AREA Problem: Optimizer tries to get the points to calculate area without checking the return value of uint4korr for 0 "points". As a result server exits. Solution: Check the return value from uint4korr(). --- sql/spatial.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/spatial.cc b/sql/spatial.cc index 7aab91b5a5b..7643b3a649d 100644 --- a/sql/spatial.cc +++ b/sql/spatial.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -849,6 +849,8 @@ int Gis_polygon::area(double *ar, const char **end_of_data) const if (no_data(data, 4)) return 1; n_points= uint4korr(data); + if (n_points == 0) + return 1; if (not_enough_points(data, n_points)) return 1; get_point(&prev_x, &prev_y, data+4); From ac460e584d9706e02ce3dcb86bd994276672b267 Mon Sep 17 00:00:00 2001 From: Sreeharsha Ramanavarapu Date: Fri, 22 Jul 2016 07:33:43 +0530 Subject: [PATCH 06/96] Bug #23280699: MYSQLD GOT SIGNAL 11 IN IS_NULL ON SELECT FROM I_S Issue: ------ There is a difference in the field type created when the following DDLs are used: 1) CREATE TABLE t0 AS SELECT NULL; 2) CREATE TABLE t0 AS SELECT GREATEST(NULL,NULL); The first statement creates field of type Field_string and the second one creates a field of type Field_null. This creates a problem when the query mentioned in this bug is used. Since the null_ptr is calculated differently for Field_null. Solution: --------- When there is a function returning null in the select list as mentioned above, the field should be of type Field_string. This was fixed in 5.6+ as part of Bug#14021323. This is a backport to mysql-5.5. An incorrect comment in innodb_bug54044.test has been corrected in all versions. --- mysql-test/suite/innodb/r/innodb_bug54044.result | 14 ++++++++++---- mysql-test/suite/innodb/t/innodb_bug54044.test | 11 +++++------ sql/item.cc | 5 +---- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result index 350c500cb9b..29b0127f20b 100644 --- a/mysql-test/suite/innodb/r/innodb_bug54044.result +++ b/mysql-test/suite/innodb/r/innodb_bug54044.result @@ -6,7 +6,13 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` ( `IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1 DROP TABLE table_54044; -CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL); -ERROR HY000: Can't create table 'test.tmp' (errno: -1) -CREATE TABLE tmp ENGINE = INNODB AS SELECT GREATEST(NULL, NULL); -ERROR HY000: Can't create table 'test.tmp' (errno: -1) +CREATE TABLE tmp ENGINE = INNODB +AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; +SHOW CREATE TABLE tmp; +Table Create Table +tmp CREATE TABLE `tmp` ( + `COALESCE(NULL, NULL, NULL)` binary(0) DEFAULT NULL, + `GREATEST(NULL, NULL)` binary(0) DEFAULT NULL, + `NULL` binary(0) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +DROP TABLE tmp; diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test index 0bbd7da0065..cfc6f3c3f0a 100644 --- a/mysql-test/suite/innodb/t/innodb_bug54044.test +++ b/mysql-test/suite/innodb/t/innodb_bug54044.test @@ -10,10 +10,9 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB SHOW CREATE TABLE table_54044; DROP TABLE table_54044; -# These 'create table' operations should fail because of -# using NULL datatype +# This 'create table' should pass since it uses a Field_string of size 0. ---error ER_CANT_CREATE_TABLE -CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL); ---error ER_CANT_CREATE_TABLE -CREATE TABLE tmp ENGINE = INNODB AS SELECT GREATEST(NULL, NULL); +CREATE TABLE tmp ENGINE = INNODB + AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; +SHOW CREATE TABLE tmp; +DROP TABLE tmp; diff --git a/sql/item.cc b/sql/item.cc index 1541314ec97..34157c33cf4 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -5360,10 +5360,6 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length) field= new Field_double((uchar*) 0, max_length, null_ptr, 0, Field::NONE, name, decimals, 0, unsigned_flag); break; - case MYSQL_TYPE_NULL: - field= new Field_null((uchar*) 0, max_length, Field::NONE, - name, &my_charset_bin); - break; case MYSQL_TYPE_INT24: field= new Field_medium((uchar*) 0, max_length, null_ptr, 0, Field::NONE, name, 0, unsigned_flag); @@ -5394,6 +5390,7 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length) DBUG_ASSERT(0); /* If something goes awfully wrong, it's better to get a string than die */ case MYSQL_TYPE_STRING: + case MYSQL_TYPE_NULL: if (fixed_length && max_length < CONVERT_IF_BIGGER_TO_BLOB) { field= new Field_string(max_length, maybe_null, name, From fd31eea949e5cbfccb97715bec62f6c63ece6010 Mon Sep 17 00:00:00 2001 From: Arun Kuruvila Date: Fri, 22 Jul 2016 13:15:32 +0530 Subject: [PATCH 07/96] Bug #23295288: HANDLE_FATAL_SIGNAL (SIG=11) IN GET_SERVER_FROM_TABLE_TO_CACHE Description:- Server received SIG11 in the function, "get_server_from_table_to_cache()". Analysis:- Defining a server with a blank name is not handled properly. Fix:- Modified "get_server_from_table_to_cache()" to take care of blank server name. --- sql/sql_yacc.yy | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 16ed61334c6..b8ddc8bd49f 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2148,6 +2148,11 @@ server_def: ident_or_text OPTIONS_SYM '(' server_options_list ')' { + if ($2.length == 0) + { + my_error(ER_WRONG_VALUE, MYF(0), "server name", ""); + MYSQL_YYABORT; + } Lex->server_options.server_name= $2.str; Lex->server_options.server_name_length= $2.length; Lex->server_options.scheme= $6.str; From 2674cf91c5dad871f59704685e77e76ba8f462cd Mon Sep 17 00:00:00 2001 From: Thayumanavar S Date: Mon, 25 Jul 2016 06:43:16 +0100 Subject: [PATCH 08/96] BUG#23703568 - IS CLIENT LIBRARY SUPPOSED TO RETRY EINTR INDEFINITELY OR NOT Commit#ebd24626ca38e7fa1e3da2acdcf88540be70fabe obsoleted the THREAD and THREAD_SAFE_CLIENT preprocessor symbols. This is not removed in the sql/net_serv.cc thereby the code that retries on EINTR became dead code. Remove the THREAD_SAFE_CLIENT preprocessor directive form sql/net_serv.cc. Also check errno for EINTR only if there is an error in preceding read call. --- sql/net_serv.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 8b44c7d443f..9c0c84bb292 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -676,13 +676,13 @@ net_real_write(NET *net,const uchar *packet, size_t len) my_progname); #endif /* EXTRA_DEBUG */ } -#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER) +#ifndef MYSQL_SERVER if (vio_errno(net->vio) == SOCKET_EINTR) { DBUG_PRINT("warning",("Interrupted write. Retrying...")); continue; } -#endif /* defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER) */ +#endif /* !defined(MYSQL_SERVER) */ net->error= 2; /* Close socket */ net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : ER_NET_ERROR_ON_WRITE); @@ -887,8 +887,9 @@ my_real_read(NET *net, size_t *complen) my_progname,vio_errno(net->vio)); #endif /* EXTRA_DEBUG */ } -#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER) - if (vio_errno(net->vio) == SOCKET_EINTR) +#ifndef MYSQL_SERVER + if (static_cast(length) < 0 && + vio_errno(net->vio) == SOCKET_EINTR) { DBUG_PRINT("warning",("Interrupted read. Retrying...")); continue; From a63a250d40be0254c6a5633329a6b0577eaee74c Mon Sep 17 00:00:00 2001 From: Neha Kumari Date: Mon, 25 Jul 2016 20:34:20 +0530 Subject: [PATCH 09/96] BUG#23509275 :DBUG_PRINT in THD::decide_logging_format prints incorrectly, access out-of-bound Problem: In debug builds, there is a chance that an out-of-bounds read is performed when tables are locked in LTM_PRELOCKED_UNDER_LOCK_TABLES mode. It can happen because the debug code uses enum values as index for an array of mode descriptions, but it only takes into consideration 3 out of 4 of the enum values. Fix: This patch fixes it by implementing a getter for the enum which returns a string representation of the enum, effectively removing the out-of-bounds read. Moreover, it also fixes the lock mode descriptions that would be print out in debug builds. --- sql/sql_class.cc | 32 ++++++++++++++++++++++---------- sql/sql_class.h | 11 +++++++++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 4711009d7cd..0696021cfc0 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4246,6 +4246,25 @@ has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables) return 0; } +#ifndef DBUG_OFF +const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode) +{ + switch (locked_tables_mode) + { + case LTM_NONE: + return "LTM_NONE"; + case LTM_LOCK_TABLES: + return "LTM_LOCK_TABLES"; + case LTM_PRELOCKED: + return "LTM_PRELOCKED"; + case LTM_PRELOCKED_UNDER_LOCK_TABLES: + return "LTM_PRELOCKED_UNDER_LOCK_TABLES"; + default: + return "Unknown table lock mode"; + } +} +#endif + /** Decide on logging format to use for the statement and issue errors or warnings as needed. The decision depends on the following @@ -4397,15 +4416,8 @@ int THD::decide_logging_format(TABLE_LIST *tables) TABLE* prev_access_table= NULL; #ifndef DBUG_OFF - { - static const char *prelocked_mode_name[] = { - "NON_PRELOCKED", - "PRELOCKED", - "PRELOCKED_UNDER_LOCK_TABLES", - }; - DBUG_PRINT("debug", ("prelocked_mode: %s", - prelocked_mode_name[locked_tables_mode])); - } + DBUG_PRINT("debug", ("prelocked_mode: %s", + get_locked_tables_mode_name(locked_tables_mode))); #endif if (variables.binlog_format != BINLOG_FORMAT_ROW && tables) diff --git a/sql/sql_class.h b/sql/sql_class.h index 0df8c70e184..dcc7458ee50 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -963,6 +963,8 @@ typedef I_List Item_change_list; /** Type of locked tables mode. See comment for THD::locked_tables_mode for complete description. + While adding new enum values add them to the getter method for this enum + declared below and defined in sql_class.cc as well. */ enum enum_locked_tables_mode @@ -973,6 +975,15 @@ enum enum_locked_tables_mode LTM_PRELOCKED_UNDER_LOCK_TABLES }; +#ifndef DBUG_OFF +/** + Getter for the enum enum_locked_tables_mode + @param locked_tables_mode enum for types of locked tables mode + + @return The string represantation of that enum value +*/ +const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode); +#endif /** Class that holds information about tables which were opened and locked From 8bb95e9a974b5eee44764698fafc56279f8b7691 Mon Sep 17 00:00:00 2001 From: Sreeharsha Ramanavarapu Date: Wed, 3 Aug 2016 09:58:36 +0530 Subject: [PATCH 10/96] Bug #24380263: INCORRECT BEHAVIOR WITH PARAMETER AND DERIVED TABLE IN JOIN ISSUE: ------ This problem occurs under the following conditions: 1) A parameter is used in the select-list of a derived table. 2) The derived table is part of a JOIN. SOLUTION: --------- When a derived table is materialized, a temporary table is created. This temporary table creates a field each for the items in the select-list of the derived table. This set of fields is later used to setup the join. Currently no field is created in the temporary table if a parameter is used in the select-list. Create a field for the parameter. By default Item_param's result type in a prepared statement is set to STRING_RESULT. This can change during the execute phase depending on the user variable. But since the execute phase creates its own temporary table, it will be handled separately. This is a backport of the fix for BUG#22392374. --- sql/sql_select.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b5ecebdadc8..fb705e9ba6a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -10240,6 +10240,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, case Item::REF_ITEM: case Item::NULL_ITEM: case Item::VARBIN_ITEM: + case Item::PARAM_ITEM: if (make_copy_field) { DBUG_ASSERT(((Item_result_field*)item)->result_field); From 322afb2c0a6bc7c8ffb365c61b90392bfe4b4f31 Mon Sep 17 00:00:00 2001 From: Kailasnath Nagarkar Date: Wed, 3 Aug 2016 12:54:58 +0530 Subject: [PATCH 11/96] Bug #19984392 : MEDIUMINT: STACK BUFFER OVERFLOW IN PROTOCOL_TEXT::STORE_LONG ISSUE: Queries with mediumint as column when operated with long long type of data results in buffer overflow in store_long function. The merging rule specified for (MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24) is MYSQL_TYPE_LONG. Due to this store_long function was getting called which resulted in buffer overflow. SOLUTION: The correct merging rule for (MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24) should be MYSQL_TYPE_LONGLONG. So, instead of function store_long, function store_longlong is called which correctly handles the type MYSQL_TYPE_LONGLONG. External Bug #23645238 is a duplicate of this issue. --- sql/field.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/field.cc b/sql/field.cc index abe856a8292..d9889f0fb48 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -347,7 +347,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 - MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG, + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, //MYSQL_TYPE_DATE MYSQL_TYPE_TIME MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR From 194776ce00f6fea37551ea25584798f78b0ad24b Mon Sep 17 00:00:00 2001 From: Kailasnath Nagarkar Date: Thu, 4 Aug 2016 12:49:50 +0530 Subject: [PATCH 12/96] Bug #19984392 : MEDIUMINT: STACK BUFFER OVERFLOW IN PROTOCOL_TEXT::STORE_LONG Reverting the patch due to some issues. --- sql/field.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/field.cc b/sql/field.cc index d9889f0fb48..3ca072e7771 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -347,7 +347,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 - MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG, //MYSQL_TYPE_DATE MYSQL_TYPE_TIME MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR From 22eec68941f3acbd9033e7fb33d10c63e6b388da Mon Sep 17 00:00:00 2001 From: Neha Kumari Date: Fri, 5 Aug 2016 12:17:11 +0530 Subject: [PATCH 13/96] Bug#23540182:MYSQLBINLOG DOES NOT FREE THE EXISTING CONNECTION BEFORE OPENING NEW REMOTE ONE It happens when you are trying to read two or more log files from a remote server using mysqlbinlog utility. The reason for this is no matching mysql_close() that concludes the life time of 'mysql' struct describing connection to the server. This happens when mysqlbinlog is invoked with connecting to the server and requesting more than one binlog file. In such case dump_remote_log_entries() keeps calling safe_connect() per eachfile, never caring to invoke mysql_close(). Only the final safe_connect()'s allocation effect are cleaned by the base code. That is with 2 files there's one 'mysql' connection descriptor struct uncleaned/deallocated. We are backporting the bug 21255763 (pushed in mysql-trunk) in the earlier version of MySQL starting from 5.5 to 5.7. which was pushed in mysql-trunk. Fix: Invoke mysql_close() just before mysql_init() in safe_connect() defined in mysqlbinlog.cc. That makes possibly previously used 'mysql' be reclaimed prior a new one is allocated. --- client/mysqlbinlog.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 73a801c4b21..955d9e3fb3c 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1444,6 +1444,12 @@ static int parse_args(int *argc, char*** argv) */ static Exit_status safe_connect() { + /* + A possible old connection's resources are reclaimed now + at new connect attempt. The final safe_connect resources + are mysql_closed at the end of program, explicitly. + */ + mysql_close(mysql); mysql= mysql_init(NULL); if (!mysql) From 0c6eac64c7d63d1fdf6fa78724b817f03e5d7454 Mon Sep 17 00:00:00 2001 From: Balasubramanian Kandasamy Date: Mon, 8 Aug 2016 15:15:17 +0530 Subject: [PATCH 14/96] Raise version number after cloning 5.5.52 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index acabf9b42d0..d44c8b28006 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=52 +MYSQL_VERSION_PATCH=53 MYSQL_VERSION_EXTRA= From 737964dcd12e61ae7fb4b47505158e2fecce9f2b Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Wed, 10 Aug 2016 11:24:18 -0400 Subject: [PATCH 15/96] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index db9d497c141..acabf9b42d0 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=51 +MYSQL_VERSION_PATCH=52 MYSQL_VERSION_EXTRA= From 09cb64682bda8648b0cdad606b169b4d592f839d Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Thu, 11 Aug 2016 19:35:53 +0000 Subject: [PATCH 16/96] Windows : fix search for WiX root directory when using 64bit cmake "C:\Program Files (x86)" directory needs to be checked as well in this case. --- win/packaging/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt index 0535a486d57..1682bae6986 100644 --- a/win/packaging/CMakeLists.txt +++ b/win/packaging/CMakeLists.txt @@ -24,10 +24,13 @@ ENDIF() SET(MANUFACTURER "MariaDB Corporation Ab") -FIND_PATH(WIX_DIR heat.exe - "$ENV{ProgramFiles}/WiX Toolset v3.9/bin" - "$ENV{ProgramFiles}/WiX Toolset v3.10/bin" -) +SET(WIX_BIN_PATHS) +FOREACH(WIX_VER 3.9 3.10 3.11) + LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin") + LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin") +ENDFOREACH() + +FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS}) SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB") IF(CMAKE_SIZEOF_VOID_P EQUAL 4) SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3") From 723488bba162109f241bc764b6e33c6f3d8b39d6 Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Thu, 4 Aug 2016 15:43:52 +0400 Subject: [PATCH 17/96] MDEV-10424 - Assertion `ticket == __null' failed in MDL_request::set_type Reexecution of prepared "ANALYZE TABLE merge_table, table" may miss to reinitialize "table" for subsequent execution and trigger assertion failure. This happens because MERGE engine may adjust table->next_global chain, which gets cleared by close_thread_tables()/ha_myisammrg::detach_children() later. Since reinitilization iterates next_global chain, it won't see tables following merge table. Fixed by appending saved next_global chain after merge children. --- mysql-test/r/merge.result | 17 +++++++++++++++++ mysql-test/t/merge.test | 13 +++++++++++++ sql/sql_admin.cc | 14 +++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result index 41ee148cee3..66ba6cea70e 100644 --- a/mysql-test/r/merge.result +++ b/mysql-test/r/merge.result @@ -3832,4 +3832,21 @@ test.m1 repair error Corrupt # Clean-up. drop tables m1, t1, t4; drop view t3; +# +# MDEV-10424 - Assertion `ticket == __null' failed in +# MDL_request::set_type +# +CREATE TABLE t1 (f1 INT) ENGINE=MyISAM; +CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1); +PREPARE stmt FROM "ANALYZE TABLE tmerge, t1"; +EXECUTE stmt; +Table Op Msg_type Msg_text +test.tmerge analyze note The storage engine for the table doesn't support analyze +test.t1 analyze status Table is already up to date +EXECUTE stmt; +Table Op Msg_type Msg_text +test.tmerge analyze note The storage engine for the table doesn't support analyze +test.t1 analyze status Table is already up to date +DEALLOCATE PREPARE stmt; +DROP TABLE t1, tmerge; End of 5.5 tests diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test index 6573c2b09c0..9d0ddd01752 100644 --- a/mysql-test/t/merge.test +++ b/mysql-test/t/merge.test @@ -2880,6 +2880,19 @@ drop tables m1, t1, t4; drop view t3; +--echo # +--echo # MDEV-10424 - Assertion `ticket == __null' failed in +--echo # MDL_request::set_type +--echo # +CREATE TABLE t1 (f1 INT) ENGINE=MyISAM; +CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1); +PREPARE stmt FROM "ANALYZE TABLE tmerge, t1"; +EXECUTE stmt; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; +DROP TABLE t1, tmerge; + + --echo End of 5.5 tests --disable_result_log diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 12a59fa6ee8..55effcd7002 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -441,7 +441,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } thd->prepare_derived_at_open= FALSE; - table->next_global= save_next_global; + /* + MERGE engine may adjust table->next_global chain, thus we have to + append save_next_global after merge children. + */ + if (save_next_global) + { + TABLE_LIST *table_list_iterator= table; + while (table_list_iterator->next_global) + table_list_iterator= table_list_iterator->next_global; + table_list_iterator->next_global= save_next_global; + save_next_global->prev_global= &table_list_iterator->next_global; + } + table->next_local= save_next_local; thd->open_options&= ~extra_open_options; From a92a8cc817649df80fd84b6a466da345772660fb Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 19 Aug 2016 17:11:20 +0000 Subject: [PATCH 18/96] Windows packaging : use /d switch to sign MSI, to prevent installer showing randomly generated name in UAC prompt --- win/packaging/create_msi.cmake.in | 1 + 1 file changed, 1 insertion(+) diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in index a8ca35906e8..fceea041676 100644 --- a/win/packaging/create_msi.cmake.in +++ b/win/packaging/create_msi.cmake.in @@ -430,6 +430,7 @@ EXECUTE_PROCESS( IF(SIGNCODE) EXECUTE_PROCESS( COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS} + /d ${CPACK_PACKAGE_FILE_NAME}.msi ${CPACK_PACKAGE_FILE_NAME}.msi ) ENDIF() From ee97274ca7d9ea8d8f00e40476a039c35399ee15 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 25 Aug 2016 09:50:04 +0300 Subject: [PATCH 19/96] DEV-10595 MariaDB daemon leaks memory with specific query The issue was that in some extreme cases when doing GROUP BY, buffers for temporary blobs where not properly cleared. --- mysql-test/r/group_min_max_innodb.result | 16 +++++++++++++ mysql-test/t/group_min_max_innodb.test | 13 +++++++++++ sql/sql_class.h | 5 ++++ sql/sql_select.cc | 29 +++++++++++++++++++----- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result index c4d2fb88784..1e3ee793a7f 100644 --- a/mysql-test/r/group_min_max_innodb.result +++ b/mysql-test/r/group_min_max_innodb.result @@ -286,3 +286,19 @@ F 28 28 F 29 29 F 30 30 DROP TABLE t0,t1,t2; +# +# MDEV-MariaDB daemon leaks memory with specific query +# +CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL, +`language_id` int(11) unsigned NOT NULL DEFAULT '1' +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', +`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8; +insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); +SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; +translation_resources serialized_c +NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +drop table t1,t2; diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test index 6967f847147..91e0bd3279f 100644 --- a/mysql-test/t/group_min_max_innodb.test +++ b/mysql-test/t/group_min_max_innodb.test @@ -230,3 +230,16 @@ eval EXPLAIN $query; eval $query; DROP TABLE t0,t1,t2; + +--echo # +--echo # MDEV-MariaDB daemon leaks memory with specific query +--echo # + +CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL, + `language_id` int(11) unsigned NOT NULL DEFAULT '1' +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', + `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8; +insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); +SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; +drop table t1,t2; diff --git a/sql/sql_class.h b/sql/sql_class.h index d24dad7c2ca..da83382d5e9 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3607,6 +3607,11 @@ public: save_copy_field_end= copy_field_end= NULL; } } + void free_copy_field_data() + { + for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++) + ptr->tmp.free(); + } }; class select_union :public select_result_interceptor diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 613cbb2e086..121805dd0e2 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8275,9 +8275,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) We need to destruct the copy_field (allocated in create_tmp_table()) before setting it to 0 if the join is not "reusable". */ - if (!tmp_join || tmp_join != this) - tmp_table_param.cleanup(); - tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + if (!tmp_join || tmp_join != this) + tmp_table_param.cleanup(); + else + { + /* + Free data buffered in copy_fields, but keep data pointed by copy_field + around for next iteration (possibly stored in save_copy_fields). + + It would be logically simpler to not clear copy_field + below, but as we have loops that runs over copy_field to + copy_field_end that should not be done anymore, it's simpler to + just clear the pointers. + + Another option would be to just clear copy_field_end and not run + the loops if this is not set or to have tmp_table_param.cleanup() + to run cleanup on save_copy_field if copy_field is not set. + */ + tmp_table_param.free_copy_field_data(); + tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + } first_record= sort_and_group=0; send_records= (ha_rows) 0; @@ -10866,7 +10883,7 @@ void JOIN::join_free() /** Free resources of given join. - @param fill true if we should free all resources, call with full==1 + @param full true if we should free all resources, call with full==1 should be last, before it this function can be called with full==0 @@ -10982,7 +10999,7 @@ void JOIN::cleanup(bool full) /* If we have tmp_join and 'this' JOIN is not tmp_join and tmp_table_param.copy_field's of them are equal then we have to remove - pointer to tmp_table_param.copy_field from tmp_join, because it qill + pointer to tmp_table_param.copy_field from tmp_join, because it will be removed in tmp_table_param.cleanup(). */ if (tmp_join && @@ -21397,7 +21414,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, err: if (copy) delete [] param->copy_field; // This is never 0 - param->copy_field=0; + param->copy_field= 0; err2: DBUG_RETURN(TRUE); } From 684a165f28b3718160a3e4c5ebd18a465d85e97c Mon Sep 17 00:00:00 2001 From: Terje Rosten Date: Fri, 12 Aug 2016 12:38:20 +0200 Subject: [PATCH 20/96] Bug#24464380 PRIVILEGE ESCALATION USING MYSQLD_SAFE Argument to malloc-lib must be included in restricted list of directories, symlink guards added, and mysqld and mysqld-version options restricted to command line only. Don't redirect errors to stderr. --- packaging/rpm-oel/mysql.init | 2 +- packaging/rpm-sles/mysql.init | 2 +- scripts/mysqld_safe.sh | 79 +++++++++++++++++++++-------------- support-files/mysql.server.sh | 2 +- 4 files changed, 50 insertions(+), 35 deletions(-) diff --git a/packaging/rpm-oel/mysql.init b/packaging/rpm-oel/mysql.init index 262d0582f68..aaea498d153 100644 --- a/packaging/rpm-oel/mysql.init +++ b/packaging/rpm-oel/mysql.init @@ -102,7 +102,7 @@ start(){ # alarms, per bug #547485 $exec --datadir="$datadir" --socket="$socketfile" \ --pid-file="$mypidfile" \ - --basedir=/usr --user=mysql >/dev/null 2>&1 & + --basedir=/usr --user=mysql >/dev/null & safe_pid=$! # Spin for a maximum of N seconds waiting for the server to come up; # exit the loop immediately if mysqld_safe process disappears. diff --git a/packaging/rpm-sles/mysql.init b/packaging/rpm-sles/mysql.init index 50ca4c9033c..dda0bebba56 100644 --- a/packaging/rpm-sles/mysql.init +++ b/packaging/rpm-sles/mysql.init @@ -137,7 +137,7 @@ start () { rc_failed 6 ; rc_status -v ; rc_exit fi - $PROG --basedir=/usr --datadir="$datadir" --pid-file="$pidfile" >/dev/null 2>&1 & + $PROG --basedir=/usr --datadir="$datadir" --pid-file="$pidfile" >/dev/null & if pinger $! ; then echo -n "Starting service MySQL:" touch $lockfile diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index f705953059e..11b692ec928 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -208,8 +208,17 @@ parse_arguments() { --core-file-size=*) core_file_size="$val" ;; --ledir=*) ledir="$val" ;; --malloc-lib=*) set_malloc_lib "$val" ;; - --mysqld=*) MYSQLD="$val" ;; + --mysqld=*) + if [ -z "$pick_args" ]; then + log_error "--mysqld option can only be used as command line option, found in config file" + exit 1 + fi + MYSQLD="$val" ;; --mysqld-version=*) + if [ -z "$pick_args" ]; then + log_error "--mysqld-version option can only be used as command line option, found in config file" + exit 1 + fi if test -n "$val" then MYSQLD="mysqld-$val" @@ -297,38 +306,22 @@ mysqld_ld_preload_text() { echo "$text" } - -mysql_config= -get_mysql_config() { - if [ -z "$mysql_config" ]; then - mysql_config=`echo "$0" | sed 's,/[^/][^/]*$,/mysql_config,'` - if [ ! -x "$mysql_config" ]; then - log_error "Can not run mysql_config $@ from '$mysql_config'" - exit 1 - fi - fi - - "$mysql_config" "$@" -} - - # set_malloc_lib LIB # - If LIB is empty, do nothing and return -# - If LIB is 'tcmalloc', look for tcmalloc shared library in /usr/lib -# then pkglibdir. tcmalloc is part of the Google perftools project. +# - If LIB is 'tcmalloc', look for tcmalloc shared library in $malloc_dirs. +# tcmalloc is part of the Google perftools project. # - If LIB is an absolute path, assume it is a malloc shared library # # Put LIB in mysqld_ld_preload, which will be added to LD_PRELOAD when # running mysqld. See ld.so for details. set_malloc_lib() { + # This list is kept intentionally simple. + malloc_dirs="/usr/lib /usr/lib64 /usr/lib/i386-linux-gnu /usr/lib/x86_64-linux-gnu" malloc_lib="$1" if [ "$malloc_lib" = tcmalloc ]; then - pkglibdir=`get_mysql_config --variable=pkglibdir` malloc_lib= - # This list is kept intentionally simple. Simply set --malloc-lib - # to a full path if another location is desired. - for libdir in /usr/lib "$pkglibdir" "$pkglibdir/mysql"; do + for libdir in $(echo $malloc_dirs); do for flavor in _minimal '' _and_profiler _debug; do tmp="$libdir/libtcmalloc$flavor.so" #log_notice "DEBUG: Checking for malloc lib '$tmp'" @@ -339,7 +332,7 @@ set_malloc_lib() { done if [ -z "$malloc_lib" ]; then - log_error "no shared library for --malloc-lib=tcmalloc found in /usr/lib or $pkglibdir" + log_error "no shared library for --malloc-lib=tcmalloc found in $malloc_dirs" exit 1 fi fi @@ -350,9 +343,21 @@ set_malloc_lib() { case "$malloc_lib" in /*) if [ ! -r "$malloc_lib" ]; then - log_error "--malloc-lib '$malloc_lib' can not be read and will not be used" + log_error "--malloc-lib can not be read and will not be used" exit 1 fi + + # Restrict to a the list in $malloc_dirs above + case "$(dirname "$malloc_lib")" in + /usr/lib) ;; + /usr/lib64) ;; + /usr/lib/i386-linux-gnu) ;; + /usr/lib/x86_64-linux-gnu) ;; + *) + log_error "--malloc-lib must be located in one of the directories: $malloc_dirs" + exit 1 + ;; + esac ;; *) log_error "--malloc-lib must be an absolute path or 'tcmalloc'; " \ @@ -569,7 +574,7 @@ then log_notice "Logging to '$err_log'." logging=file - if [ ! -f "$err_log" ]; then # if error log already exists, + if [ ! -f "$err_log" -a ! -h "$err_log" ]; then # if error log already exists, touch "$err_log" # we just append. otherwise, chmod "$fmode" "$err_log" # fix the permissions here! fi @@ -594,7 +599,7 @@ then USER_OPTION="--user=$user" fi # Change the err log to the right user, if it is in use - if [ $want_syslog -eq 0 ]; then + if [ $want_syslog -eq 0 -a ! -h "$err_log" ]; then touch "$err_log" chown $user "$err_log" fi @@ -614,9 +619,11 @@ safe_mysql_unix_port=${mysql_unix_port:-${MYSQL_UNIX_PORT:-@MYSQL_UNIX_ADDR@}} mysql_unix_port_dir=`dirname $safe_mysql_unix_port` if [ ! -d $mysql_unix_port_dir ] then - mkdir $mysql_unix_port_dir - chown $user $mysql_unix_port_dir - chmod 755 $mysql_unix_port_dir + if [ ! -h $mysql_unix_port_dir ]; then + mkdir $mysql_unix_port_dir + chown $user $mysql_unix_port_dir + chmod 755 $mysql_unix_port_dir + fi fi # If the user doesn't specify a binary, we assume name "mysqld" @@ -728,7 +735,9 @@ then exit 1 fi fi - rm -f "$pid_file" + if [ ! -h "$pid_file" ]; then + rm -f "$pid_file" + fi if test -f "$pid_file" then log_error "Fatal error: Can't remove the pid file: @@ -779,13 +788,19 @@ have_sleep=1 while true do - rm -f $safe_mysql_unix_port "$pid_file" # Some extra safety + # Some extra safety + if [ ! -h "$safe_mysql_unix_port" ]; then + rm -f "$safe_mysql_unix_port" + fi + if [ ! -h "$pid_file" ]; then + rm -f "$pid_file" + fi start_time=`date +%M%S` eval_log_error "$cmd" - if [ $want_syslog -eq 0 -a ! -f "$err_log" ]; then + if [ $want_syslog -eq 0 -a ! -f "$err_log" -a ! -h "$err_log" ]; then touch "$err_log" # hypothetical: log was renamed but not chown $user "$err_log" # flushed yet. we'd recreate it with chmod "$fmode" "$err_log" # wrong owner next time we log, so set diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index 7487d5acc0f..909d33f8770 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -280,7 +280,7 @@ case "$mode" in then # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. - $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null 2>&1 & + $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null & wait_for_pid created "$!" "$mysqld_pid_file_path"; return_value=$? # Make lock for RedHat / SuSE From 4e5473862e6852b0f3802b0cd0c6fa10b5253291 Mon Sep 17 00:00:00 2001 From: Jon Olav Hauglid Date: Tue, 16 Aug 2016 15:35:19 +0200 Subject: [PATCH 21/96] Bug#24388746: PRIVILEGE ESCALATION AND RACE CONDITION USING CREATE TABLE During REPAIR TABLE of a MyISAM table, a temporary data file (.TMD) is created. When repair finishes, this file is renamed to the original .MYD file. The problem was that during this rename, we copied the stats from the old file to the new file with chmod/chown. If a user managed to replace the temporary file before chmod/chown was executed, it was possible to get an arbitrary file with the privileges of the mysql user. This patch fixes the problem by not copying stats from the old file to the new file. This is not needed as the new file was created with the correct stats. This fix only changes server behavior - external utilities such as myisamchk still does chmod/chown. No test case provided since the problem involves synchronization with file system operations. --- include/my_sys.h | 3 ++- include/myisam.h | 11 +++++----- mysys/my_redel.c | 12 ++++++++--- storage/myisam/ha_myisam.cc | 26 ++++++++++++++++++----- storage/myisam/mi_check.c | 41 ++++++++++++++++++++++++++----------- storage/myisam/myisamchk.c | 16 +++++++++------ 6 files changed, 77 insertions(+), 32 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index b1b8bf15be3..472c2ba5ca0 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -83,6 +83,7 @@ typedef struct my_aio_result { #define MY_RESOLVE_LINK 128 /* my_realpath(); Only resolve links */ #define MY_HOLD_ORIGINAL_MODES 128 /* my_copy() holds to file modes */ #define MY_REDEL_MAKE_BACKUP 256 +#define MY_REDEL_NO_COPY_STAT 512 /* my_redel() doesn't call my_copystat() */ #define MY_SEEK_NOT_DONE 32 /* my_lock may have to do a seek */ #define MY_DONT_WAIT 64 /* my_lock() don't wait if can't lock */ #define MY_ZEROFILL 32 /* my_malloc(), fill array with zero */ diff --git a/include/myisam.h b/include/myisam.h index 85d37a81bc6..a9fcd7e4369 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -426,12 +426,13 @@ int chk_size(MI_CHECK *param, MI_INFO *info); int chk_key(MI_CHECK *param, MI_INFO *info); int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend); int mi_repair(MI_CHECK *param, register MI_INFO *info, - char * name, int rep_quick); -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name); + char * name, int rep_quick, my_bool no_copy_stat); +int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name, + my_bool no_copy_stat); int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, - const char * name, int rep_quick); + const char * name, int rep_quick, my_bool no_copy_stat); int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, - const char * name, int rep_quick); + const char * name, int rep_quick, my_bool no_copy_stat); int change_to_newfile(const char * filename, const char * old_ext, const char * new_ext, myf myflags); int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, diff --git a/mysys/my_redel.c b/mysys/my_redel.c index a47df8265c8..25391cd4e8f 100644 --- a/mysys/my_redel.c +++ b/mysys/my_redel.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,9 @@ struct utimbuf { if MY_REDEL_MAKE_COPY is given, then the orginal file is renamed to org_name-'current_time'.BAK + + if MY_REDEL_NO_COPY_STAT is given, stats are not copied + from org_name to tmp_name. */ #define REDEL_EXT ".BAK" @@ -46,8 +49,11 @@ int my_redel(const char *org_name, const char *tmp_name, myf MyFlags) DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %d", org_name,tmp_name,MyFlags)); - if (my_copystat(org_name,tmp_name,MyFlags) < 0) - goto end; + if (!(MyFlags & MY_REDEL_NO_COPY_STAT)) + { + if (my_copystat(org_name,tmp_name,MyFlags) < 0) + goto end; + } if (MyFlags & MY_REDEL_MAKE_BACKUP) { char name_buff[FN_REFLEN+20]; diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 602a0ae6cc1..21cbef32188 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1091,24 +1091,36 @@ int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool do_optimize) /* TODO: respect myisam_repair_threads variable */ my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map)); thd_proc_info(thd, buf); + /* + The new file is created with the right stats, so we can skip + copying file stats from old to new. + */ error = mi_repair_parallel(¶m, file, fixed_name, - param.testflag & T_QUICK); + param.testflag & T_QUICK, TRUE); thd_proc_info(thd, "Repair done"); // to reset proc_info, as // it was pointing to local buffer } else { thd_proc_info(thd, "Repair by sorting"); + /* + The new file is created with the right stats, so we can skip + copying file stats from old to new. + */ error = mi_repair_by_sort(¶m, file, fixed_name, - param.testflag & T_QUICK); + param.testflag & T_QUICK, TRUE); } } else { thd_proc_info(thd, "Repair with keycache"); param.testflag &= ~T_REP_BY_SORT; + /* + The new file is created with the right stats, so we can skip + copying file stats from old to new. + */ error= mi_repair(¶m, file, fixed_name, - param.testflag & T_QUICK); + param.testflag & T_QUICK, TRUE); } #ifdef HAVE_MMAP if (remap) @@ -1124,7 +1136,11 @@ int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool do_optimize) { optimize_done=1; thd_proc_info(thd, "Sorting index"); - error=mi_sort_index(¶m,file,fixed_name); + /* + The new file is created with the right stats, so we can skip + copying file stats from old to new. + */ + error=mi_sort_index(¶m,file,fixed_name, TRUE); } if (!statistics_done && (local_testflag & T_STATISTICS)) { diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index ba1f975549a..fe0d4c9c30b 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1512,7 +1512,7 @@ static int mi_drop_all_indexes(MI_CHECK *param, MI_INFO *info, my_bool force) /* Save new datafile-name in temp_filename */ int mi_repair(MI_CHECK *param, register MI_INFO *info, - char * name, int rep_quick) + char * name, int rep_quick, my_bool no_copy_stat) { int error,got_error; ha_rows start_records,new_header_length; @@ -1726,6 +1726,11 @@ err: /* Replace the actual file with the temporary file */ if (new_file >= 0) { + myf flags= 0; + if (param->testflag & T_BACKUP_DATA) + flags |= MY_REDEL_MAKE_BACKUP; + if (no_copy_stat) + flags |= MY_REDEL_NO_COPY_STAT; mysql_file_close(new_file, MYF(0)); info->dfile=new_file= -1; /* @@ -1744,8 +1749,7 @@ err: info->s->file_map= NULL; } if (change_to_newfile(share->data_file_name, MI_NAME_DEXT, DATA_TMP_EXT, - (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + flags) || mi_open_datafile(info,share,name,-1)) got_error=1; @@ -1933,7 +1937,8 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) /* Sort index for more efficent reads */ -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name) +int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name, + my_bool no_copy_stat) { reg2 uint key; reg1 MI_KEYDEF *keyinfo; @@ -2004,7 +2009,7 @@ int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name) share->kfile = -1; (void) mysql_file_close(new_file, MYF(MY_WME)); if (change_to_newfile(share->index_file_name, MI_NAME_IEXT, INDEX_TMP_EXT, - MYF(0)) || + no_copy_stat ? MYF(MY_REDEL_NO_COPY_STAT) : MYF(0)) || mi_open_keyfile(share)) goto err2; info->lock_type= F_UNLCK; /* Force mi_readinfo to lock */ @@ -2209,6 +2214,8 @@ err: info MyISAM handler to repair name Name of table (for warnings) rep_quick set to <> 0 if we should not change data file + no_copy_stat Don't copy file stats from old to new file, + assume that new file was created with correct stats RESULT 0 ok @@ -2216,7 +2223,7 @@ err: */ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, - const char * name, int rep_quick) + const char * name, int rep_quick, my_bool no_copy_stat) { int got_error; uint i; @@ -2543,11 +2550,15 @@ err: /* Replace the actual file with the temporary file */ if (new_file >= 0) { + myf flags= 0; + if (param->testflag & T_BACKUP_DATA) + flags |= MY_REDEL_MAKE_BACKUP; + if (no_copy_stat) + flags |= MY_REDEL_NO_COPY_STAT; mysql_file_close(new_file, MYF(0)); info->dfile=new_file= -1; if (change_to_newfile(share->data_file_name,MI_NAME_DEXT, DATA_TMP_EXT, - (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + flags) || mi_open_datafile(info,share,name,-1)) got_error=1; } @@ -2595,6 +2606,8 @@ err: info MyISAM handler to repair name Name of table (for warnings) rep_quick set to <> 0 if we should not change data file + no_copy_stat Don't copy file stats from old to new file, + assume that new file was created with correct stats DESCRIPTION Same as mi_repair_by_sort but do it multithreaded @@ -2629,7 +2642,7 @@ err: */ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, - const char * name, int rep_quick) + const char * name, int rep_quick, my_bool no_copy_stat) { int got_error; uint i,key, total_key_length, istep; @@ -3076,11 +3089,15 @@ err: /* Replace the actual file with the temporary file */ if (new_file >= 0) { + myf flags= 0; + if (param->testflag & T_BACKUP_DATA) + flags |= MY_REDEL_MAKE_BACKUP; + if (no_copy_stat) + flags |= MY_REDEL_NO_COPY_STAT; mysql_file_close(new_file, MYF(0)); info->dfile=new_file= -1; if (change_to_newfile(share->data_file_name, MI_NAME_DEXT, DATA_TMP_EXT, - (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + flags) || mi_open_datafile(info,share,name,-1)) got_error=1; } diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index 8606bd7c748..9360a054872 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -993,14 +993,18 @@ static int myisamchk(MI_CHECK *param, char * filename) info->s->state.key_map, param->force_sort)) { + /* + The new file might not be created with the right stats depending + on how myisamchk is run, so we must copy file stats from old to new. + */ if (param->testflag & T_REP_BY_SORT) - error=mi_repair_by_sort(param,info,filename,rep_quick); + error= mi_repair_by_sort(param, info, filename, rep_quick, FALSE); else - error=mi_repair_parallel(param,info,filename,rep_quick); + error= mi_repair_parallel(param, info, filename, rep_quick, FALSE); state_updated=1; } else if (param->testflag & T_REP_ANY) - error=mi_repair(param, info,filename,rep_quick); + error= mi_repair(param, info, filename, rep_quick, FALSE); } if (!error && param->testflag & T_SORT_RECORDS) { @@ -1040,12 +1044,12 @@ static int myisamchk(MI_CHECK *param, char * filename) { if (param->verbose) puts("Table had a compressed index; We must now recreate the index"); - error=mi_repair_by_sort(param,info,filename,1); + error= mi_repair_by_sort(param, info, filename, 1, FALSE); } } } if (!error && param->testflag & T_SORT_INDEX) - error=mi_sort_index(param,info,filename); + error= mi_sort_index(param, info, filename, FALSE); if (!error) share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | STATE_CRASHED_ON_REPAIR); From 48bd8b16fe382be302c6f0b45931be5aa6f29a0e Mon Sep 17 00:00:00 2001 From: Sivert Sorumgard Date: Mon, 22 Aug 2016 14:30:02 +0200 Subject: [PATCH 22/96] Bug#24388753: PRIVILEGE ESCALATION USING MYSQLD_SAFE [This is the 5.5/5.6 version of the bugfix]. The problem was that it was possible to write log files ending in .ini/.cnf that later could be parsed as an options file. This made it possible for users to specify startup options without the permissions to do so. This patch fixes the problem by disallowing general query log and slow query log to be written to files ending in .ini and .cnf. --- sql/log.cc | 89 +++++++++++++++++++++++++++++++++++++++++++++++-- sql/log.h | 10 ++++++ sql/mysqld.cc | 18 +++++++++- sql/sys_vars.cc | 25 +++++++++----- 4 files changed, 131 insertions(+), 11 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 50d7762af6d..493aae8f2ff 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2293,6 +2293,77 @@ bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name, } +bool is_valid_log_name(const char *name, size_t len) +{ + if (len > 3) + { + const char *tail= name + len - 4; + if (my_strcasecmp(system_charset_info, tail, ".ini") == 0 || + my_strcasecmp(system_charset_info, tail, ".cnf") == 0) + { + return false; + } + } + return true; +} + + +/** + Get the real log file name, and possibly reopen file. + + Use realpath() to get the path with symbolic links + expanded. Then, close the file, and reopen the real path using the + O_NOFOLLOW flag. This will reject following symbolic links. + + @param file File descriptor. + @param log_file_key Key for P_S instrumentation. + @param open_flags Flags to use for opening the file. + @param opened_file_name Name of the open fd. + + @retval file descriptor to open file with 'real_file_name', or '-1' + in case of errors. +*/ + +#ifndef _WIN32 +static File mysql_file_real_name_reopen(File file, +#ifdef HAVE_PSI_INTERFACE + PSI_file_key log_file_key, +#endif + int open_flags, + const char *opened_file_name) +{ + DBUG_ASSERT(file); + DBUG_ASSERT(opened_file_name); + + /* Buffer for realpath must have capacity for PATH_MAX. */ + char real_file_name[PATH_MAX]; + + /* Get realpath, validate, open realpath with O_NOFOLLOW. */ + if (realpath(opened_file_name, real_file_name) == NULL) + { + (void) mysql_file_close(file, MYF(0)); + return -1; + } + + if (mysql_file_close(file, MYF(0))) + return -1; + + if (strlen(real_file_name) > FN_REFLEN) + return -1; + + if (!is_valid_log_name(real_file_name, strlen(real_file_name))) + { + sql_print_error("Invalid log file name after expanding symlinks: '%s'", + real_file_name); + return -1; + } + + return mysql_file_open(log_file_key, real_file_name, + open_flags | O_NOFOLLOW, + MYF(MY_WME | ME_WAITTANG)); +} +#endif // _WIN32 + /* Open a (new) log file. @@ -2358,8 +2429,22 @@ bool MYSQL_LOG::open( if ((file= mysql_file_open(log_file_key, log_file_name, open_flags, - MYF(MY_WME | ME_WAITTANG))) < 0 || - init_io_cache(&log_file, file, IO_SIZE, io_cache_type, + MYF(MY_WME | ME_WAITTANG))) < 0) + goto err; + +#ifndef _WIN32 + /* Reopen and validate path. */ + if ((log_type_arg == LOG_UNKNOWN || log_type_arg == LOG_NORMAL) && + (file= mysql_file_real_name_reopen(file, +#ifdef HAVE_PSI_INTERFACE + log_file_key, +#endif + open_flags, + log_file_name)) < 0) + goto err; +#endif // _WIN32 + + if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, mysql_file_tell(file, MYF(MY_WME)), 0, MYF(MY_WME | MY_NABP | ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0)))) diff --git a/sql/log.h b/sql/log.h index b5e751386a6..d3ecba41964 100644 --- a/sql/log.h +++ b/sql/log.h @@ -717,6 +717,16 @@ File open_binlog(IO_CACHE *log, const char *log_file_name, char *make_log_name(char *buff, const char *name, const char* log_ext); +/** + Check given log name against certain blacklisted names/extensions. + + @param name Log name to check + @param len Length of log name + + @returns true if name is valid, false otherwise. +*/ +bool is_valid_log_name(const char *name, size_t len); + extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log; extern LOGGER logger; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a2532ceddd3..e979ea1b731 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify @@ -3512,6 +3512,22 @@ static int init_common_variables() "--log-slow-queries option, log tables are used. " "To enable logging to files use the --log-output=file option."); + if (opt_logname && + !is_valid_log_name(opt_logname, strlen(opt_logname))) + { + sql_print_error("Invalid value for --general_log_file: %s", + opt_logname); + return 1; + } + + if (opt_slow_logname && + !is_valid_log_name(opt_slow_logname, strlen(opt_slow_logname))) + { + sql_print_error("Invalid value for --slow_query_log_file: %s", + opt_slow_logname); + return 1; + } + #define FIX_LOG_VAR(VAR, ALT) \ if (!VAR || !*VAR) \ { \ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index b0fa7f9a341..d08cb4f8ca8 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2810,6 +2810,14 @@ static bool check_log_path(sys_var *self, THD *thd, set_var *var) if (!var->save_result.string_value.str) return true; + if (!is_valid_log_name(var->save_result.string_value.str, + var->save_result.string_value.length)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), + self->name.str, var->save_result.string_value.str); + return true; + } + if (var->save_result.string_value.length > FN_REFLEN) { // path is too long my_error(ER_PATH_LENGTH, MYF(0), self->name.str); @@ -2856,7 +2864,7 @@ static bool check_log_path(sys_var *self, THD *thd, set_var *var) return false; } static bool fix_log(char** logname, const char* default_logname, - const char*ext, bool enabled, void (*reopen)(char*)) + const char*ext, bool enabled, bool (*reopen)(char*)) { if (!*logname) // SET ... = DEFAULT { @@ -2868,16 +2876,17 @@ static bool fix_log(char** logname, const char* default_logname, } logger.lock_exclusive(); mysql_mutex_unlock(&LOCK_global_system_variables); + bool error= false; if (enabled) - reopen(*logname); + error= reopen(*logname); logger.unlock(); mysql_mutex_lock(&LOCK_global_system_variables); - return false; + return error; } -static void reopen_general_log(char* name) +static bool reopen_general_log(char* name) { logger.get_log_file_handler()->close(0); - logger.get_log_file_handler()->open_query_log(name); + return logger.get_log_file_handler()->open_query_log(name); } static bool fix_general_log_file(sys_var *self, THD *thd, enum_var_type type) { @@ -2890,10 +2899,10 @@ static Sys_var_charptr Sys_general_log_path( IN_FS_CHARSET, DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_log_path), ON_UPDATE(fix_general_log_file)); -static void reopen_slow_log(char* name) +static bool reopen_slow_log(char* name) { logger.get_slow_log_file_handler()->close(0); - logger.get_slow_log_file_handler()->open_slow_log(name); + return logger.get_slow_log_file_handler()->open_slow_log(name); } static bool fix_slow_log_file(sys_var *self, THD *thd, enum_var_type type) { From 39ec5ac403522ad452a5fe2b2839bd5d85e5ca8f Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Thu, 25 Aug 2016 11:55:54 -0400 Subject: [PATCH 23/96] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e748b1bda54..a82a4e4d77d 100644 --- a/VERSION +++ b/VERSION @@ -1,3 +1,3 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=0 -MYSQL_VERSION_PATCH=27 +MYSQL_VERSION_PATCH=28 From 754e7eff2872995e2b6e62f9da7448587a411c7b Mon Sep 17 00:00:00 2001 From: Terje Rosten Date: Fri, 26 Aug 2016 11:25:40 +0200 Subject: [PATCH 24/96] Bug#24464380 PRIVILEGE ESCALATION USING MYSQLD_SAFE Post push fix: Solaris 10 /bin/sh don't understand $(). --- scripts/mysqld_safe.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index 11b692ec928..1b30a3bb15b 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -321,7 +321,7 @@ set_malloc_lib() { if [ "$malloc_lib" = tcmalloc ]; then malloc_lib= - for libdir in $(echo $malloc_dirs); do + for libdir in `echo $malloc_dirs`; do for flavor in _minimal '' _and_profiler _debug; do tmp="$libdir/libtcmalloc$flavor.so" #log_notice "DEBUG: Checking for malloc lib '$tmp'" @@ -348,7 +348,7 @@ set_malloc_lib() { fi # Restrict to a the list in $malloc_dirs above - case "$(dirname "$malloc_lib")" in + case "`dirname "$malloc_lib"`" in /usr/lib) ;; /usr/lib64) ;; /usr/lib/i386-linux-gnu) ;; From f81f985f37ccdcf04aa9707fe994a0c87f67b1a8 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 29 Aug 2016 11:53:33 +0200 Subject: [PATCH 25/96] fix conpilation on OpenBSD --- sql/signal_handler.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index fd6f62fa100..c3f25848e8a 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig) struct tm tm; #ifdef HAVE_STACKTRACE THD *thd; -#endif /* This flag remembers if the query pointer was found invalid. We will try and print the query at the end of the signal handler, in case we're wrong. */ bool print_invalid_query_pointer= false; +#endif if (segfaulted) { @@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig) "\"mlockall\" bugs.\n"); } +#ifdef HAVE_STACKTRACE if (print_invalid_query_pointer) { my_safe_printf_stderr( @@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig) my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length())); my_safe_printf_stderr("\n\n"); } +#endif #ifdef HAVE_WRITE_CORE if (test_flags & TEST_CORE_ON_SIGNAL) From b9631e310b7cadf8711eef643e432d7e816680b4 Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Tue, 10 Nov 2015 12:41:26 +0100 Subject: [PATCH 26/96] MDEV-8833 Crash of server on prepared statement with conversion to semi-join Correct context chain made to allow outer fields pullout. --- mysql-test/r/ps.result | 33 ++++++++++++++++++++++++++++++++- mysql-test/t/ps.test | 29 ++++++++++++++++++++++++++++- sql/item.cc | 23 +++++++++++++++++++++-- 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result index 04a19d3840f..bb8b76faa49 100644 --- a/mysql-test/r/ps.result +++ b/mysql-test/r/ps.result @@ -4072,4 +4072,35 @@ id value deallocate prepare stmt; SET SESSION sql_mode = @save_sql_mode; DROP TABLE t1,t2; -# End of 10.0 tests +# +# MDEV-8833: Crash of server on prepared statement with +# conversion to semi-join +# +CREATE TABLE t1 (column1 INT); +INSERT INTO t1 VALUES (3),(9); +CREATE TABLE t2 (column2 INT); +INSERT INTO t2 VALUES (1),(4); +CREATE TABLE t3 (column3 INT); +INSERT INTO t3 VALUES (6),(8); +CREATE TABLE t4 (column4 INT); +INSERT INTO t4 VALUES (2),(5); +PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 +FROM t1 AS table1 +WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) +) AS sq +FROM t3 AS table3, t4 AS table4"; +EXECUTE stmt; +sq +NULL +NULL +NULL +NULL +EXECUTE stmt; +sq +NULL +NULL +NULL +NULL +deallocate prepare stmt; +drop table t1,t2,t3,t4; +# End of 5.5 tests diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test index 2ed5bb11bac..1516acca01e 100644 --- a/mysql-test/t/ps.test +++ b/mysql-test/t/ps.test @@ -3653,5 +3653,32 @@ deallocate prepare stmt; SET SESSION sql_mode = @save_sql_mode; DROP TABLE t1,t2; +--echo # +--echo # MDEV-8833: Crash of server on prepared statement with +--echo # conversion to semi-join +--echo # ---echo # End of 10.0 tests +CREATE TABLE t1 (column1 INT); +INSERT INTO t1 VALUES (3),(9); + +CREATE TABLE t2 (column2 INT); +INSERT INTO t2 VALUES (1),(4); + +CREATE TABLE t3 (column3 INT); +INSERT INTO t3 VALUES (6),(8); + +CREATE TABLE t4 (column4 INT); +INSERT INTO t4 VALUES (2),(5); + +PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 +FROM t1 AS table1 +WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) +) AS sq +FROM t3 AS table3, t4 AS table4"; +EXECUTE stmt; +EXECUTE stmt; +deallocate prepare stmt; +drop table t1,t2,t3,t4; + + +--echo # End of 5.5 tests diff --git a/sql/item.cc b/sql/item.cc index 5861766371c..abcf48fc270 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -2778,9 +2778,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref) if (context) { Name_resolution_context *ctx= new Name_resolution_context(); - ctx->outer_context= NULL; // We don't build a complete name resolver - ctx->table_list= NULL; // We rely on first_name_resolution_table instead + if (context->select_lex == new_parent) + { + /* + This field was pushed in then pulled out + (for example left part of IN) + */ + ctx->outer_context= context->outer_context; + } + else if (context->outer_context) + { + /* just pull to the upper context */ + ctx->outer_context= context->outer_context->outer_context; + } + else + { + /* No upper context (merging Derived/VIEW where context chain ends) */ + ctx->outer_context= NULL; + } + ctx->table_list= context->first_name_resolution_table; ctx->select_lex= new_parent; + if (context->select_lex == NULL) + ctx->select_lex= NULL; ctx->first_name_resolution_table= context->first_name_resolution_table; ctx->last_name_resolution_table= context->last_name_resolution_table; ctx->error_processor= context->error_processor; From a14f61ef749ad9f9ab2b0f5badf6754ba7443c9e Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 5 Sep 2016 12:28:35 +0300 Subject: [PATCH 27/96] MDEV-7142: main.index_merge_innodb fails sporadically in buildbot Attempt to stabilize the testcase. --- mysql-test/include/index_merge2.inc | 1 + mysql-test/r/index_merge_innodb.result | 3 +++ mysql-test/r/index_merge_myisam.result | 3 +++ 3 files changed, 7 insertions(+) diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc index c50a45a9923..03afa49d323 100644 --- a/mysql-test/include/index_merge2.inc +++ b/mysql-test/include/index_merge2.inc @@ -341,6 +341,7 @@ while ($1) alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; # to test the bug, the following must use "sort_union": --replace_column 9 REF diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result index 5202c79f3c7..5bf56e213ab 100644 --- a/mysql-test/r/index_merge_innodb.result +++ b/mysql-test/r/index_merge_innodb.result @@ -311,6 +311,9 @@ set @d=@d*2; alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result index fcd5eebefa4..c63ed132662 100644 --- a/mysql-test/r/index_merge_myisam.result +++ b/mysql-test/r/index_merge_myisam.result @@ -1146,6 +1146,9 @@ set @d=@d*2; alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where From 213765cc222139c05c27774e0555cabeff7c3bbd Mon Sep 17 00:00:00 2001 From: Olivier Bertrand Date: Mon, 5 Sep 2016 13:18:04 +0200 Subject: [PATCH 28/96] - Fix MDEV-10496. Memory leak in discovery modified: storage/connect/ha_connect.cc modified: storage/connect/mycat.cc - Fix wrong lrecl calculation for virtual columns modified: storage/connect/reldef.cpp - Typo modified: storage/connect/jdbconn.cpp modified: storage/connect/json.cpp --- storage/connect/ha_connect.cc | 98 +++++++++-------- storage/connect/jdbconn.cpp | 195 +--------------------------------- storage/connect/json.cpp | 6 +- storage/connect/mycat.cc | 14 +-- storage/connect/reldef.cpp | 2 +- 5 files changed, 61 insertions(+), 254 deletions(-) diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc index ea6fb1b08c1..cf945a73f46 100644 --- a/storage/connect/ha_connect.cc +++ b/storage/connect/ha_connect.cc @@ -224,6 +224,7 @@ uint GetWorkSize(void); void SetWorkSize(uint); extern "C" const char *msglang(void); +static void PopUser(PCONNECT xp); static PCONNECT GetUser(THD *thd, PCONNECT xp); static PGLOBAL GetPlug(THD *thd, PCONNECT& lxp); @@ -831,42 +832,51 @@ ha_connect::~ha_connect(void) table ? table->s->table_name.str : "", xp, xp ? xp->count : 0); - if (xp) { - PCONNECT p; - - xp->count--; - - for (p= user_connect::to_users; p; p= p->next) - if (p == xp) - break; - - if (p && !p->count) { - if (p->next) - p->next->previous= p->previous; - - if (p->previous) - p->previous->next= p->next; - else - user_connect::to_users= p->next; - - } // endif p - - if (!xp->count) { - PlugCleanup(xp->g, true); - delete xp; - } // endif count - - } // endif xp - + PopUser(xp); } // end of ha_connect destructor +/****************************************************************************/ +/* Check whether this user can be removed. */ +/****************************************************************************/ +static void PopUser(PCONNECT xp) +{ + if (xp) { + xp->count--; + + if (!xp->count) { + PCONNECT p; + + for (p= user_connect::to_users; p; p= p->next) + if (p == xp) + break; + + if (p) { + if (p->next) + p->next->previous= p->previous; + + if (p->previous) + p->previous->next= p->next; + else + user_connect::to_users= p->next; + + } // endif p + + PlugCleanup(xp->g, true); + delete xp; + } // endif count + + } // endif xp + +} // end of PopUser + + /****************************************************************************/ /* Get a pointer to the user of this handler. */ /****************************************************************************/ static PCONNECT GetUser(THD *thd, PCONNECT xp) { - if (!thd) + if (!thd) return NULL; if (xp && thd == xp->thdp) @@ -890,7 +900,6 @@ static PCONNECT GetUser(THD *thd, PCONNECT xp) return xp; } // end of GetUser - /****************************************************************************/ /* Get the global pointer of the user of this handler. */ /****************************************************************************/ @@ -5261,7 +5270,18 @@ static int connect_assisted_discovery(handlerton *, THD* thd, if (!(shm= (char*)db)) db= table_s->db.str; // Default value - // Check table type + // Save stack and allocation environment and prepare error return + if (g->jump_level == MAX_JUMP) { + strcpy(g->Message, MSG(TOO_MANY_JUMPS)); + goto jer; + } // endif jump_level + + if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) { + my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); + goto err; + } // endif rc + + // Check table type if (ttp == TAB_UNDEF) { topt->type= (src) ? "MYSQL" : (tab) ? "PROXY" : "DOS"; ttp= GetTypeID(topt->type); @@ -5270,20 +5290,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd, } else if (ttp == TAB_NIY) { sprintf(g->Message, "Unsupported table type %s", topt->type); my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); - return HA_ERR_INTERNAL_ERROR; + goto err; } // endif ttp - // Save stack and allocation environment and prepare error return - if (g->jump_level == MAX_JUMP) { - strcpy(g->Message, MSG(TOO_MANY_JUMPS)); - return HA_ERR_INTERNAL_ERROR; - } // endif jump_level - - if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) { - my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); - goto err; - } // endif rc - if (!tab) { if (ttp == TAB_TBL) { // Make tab the first table of the list @@ -5843,6 +5852,7 @@ static int connect_assisted_discovery(handlerton *, THD* thd, rc= init_table_share(thd, table_s, create_info, &sql); g->jump_level--; + PopUser(xp); return rc; } // endif ok @@ -5850,7 +5860,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd, err: g->jump_level--; - return HA_ERR_INTERNAL_ERROR; + jer: + PopUser(xp); + return HA_ERR_INTERNAL_ERROR; } // end of connect_assisted_discovery /** diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp index 3b8de3e975b..952847507a0 100644 --- a/storage/connect/jdbconn.cpp +++ b/storage/connect/jdbconn.cpp @@ -498,145 +498,6 @@ PQRYRES JDBCDrivers(PGLOBAL g, int maxres, bool info) return qrp; } // end of JDBCDrivers -#if 0 -/*************************************************************************/ -/* JDBCDataSources: constructs the result blocks containing all JDBC */ -/* data sources available on the local host. */ -/* Called with info=true to have result column names. */ -/*************************************************************************/ -PQRYRES JDBCDataSources(PGLOBAL g, int maxres, bool info) -{ - int buftyp[] ={ TYPE_STRING, TYPE_STRING }; - XFLD fldtyp[] ={ FLD_NAME, FLD_REM }; - unsigned int length[] ={ 0, 256 }; - bool b[] ={ false, true }; - int i, n = 0, ncol = 2; - PCOLRES crp; - PQRYRES qrp; - JDBConn *jcp = NULL; - - /************************************************************************/ - /* Do an evaluation of the result size. */ - /************************************************************************/ - if (!info) { - jcp = new(g)JDBConn(g, NULL); - n = jcp->GetMaxValue(SQL_MAX_DSN_LENGTH); - length[0] = (n) ? (n + 1) : 256; - - if (!maxres) - maxres = 512; // Estimated max number of data sources - - } else { - length[0] = 256; - maxres = 0; - } // endif info - - if (trace) - htrc("JDBCDataSources: max=%d len=%d\n", maxres, length[0]); - - /************************************************************************/ - /* Allocate the structures used to refer to the result set. */ - /************************************************************************/ - qrp = PlgAllocResult(g, ncol, maxres, IDS_DSRC, - buftyp, fldtyp, length, false, true); - - for (i = 0, crp = qrp->Colresp; crp; i++, crp = crp->Next) - if (b[i]) - crp->Kdata->SetNullable(true); - - /************************************************************************/ - /* Now get the results into blocks. */ - /************************************************************************/ - if (!info && qrp && jcp->GetDataSources(qrp)) - qrp = NULL; - - /************************************************************************/ - /* Return the result pointer for use by GetData routines. */ - /************************************************************************/ - return qrp; -} // end of JDBCDataSources - -/**************************************************************************/ -/* PrimaryKeys: constructs the result blocks containing all the */ -/* JDBC catalog information concerning primary keys. */ -/**************************************************************************/ -PQRYRES JDBCPrimaryKeys(PGLOBAL g, JDBConn *op, char *dsn, char *table) -{ - static int buftyp[] ={ TYPE_STRING, TYPE_STRING, TYPE_STRING, - TYPE_STRING, TYPE_SHORT, TYPE_STRING }; - static unsigned int length[] ={ 0, 0, 0, 0, 6, 128 }; - int n, ncol = 5; - int maxres; - PQRYRES qrp; - JCATPARM *cap; - JDBConn *jcp = op; - - if (!op) { - /**********************************************************************/ - /* Open the connection with the JDBC data source. */ - /**********************************************************************/ - jcp = new(g)JDBConn(g, NULL); - - if (jcp->Open(dsn, 2) < 1) // 2 is openReadOnly - return NULL; - - } // endif op - - /************************************************************************/ - /* Do an evaluation of the result size. */ - /************************************************************************/ - n = jcp->GetMaxValue(SQL_MAX_COLUMNS_IN_TABLE); - maxres = (n) ? (int)n : 250; - n = jcp->GetMaxValue(SQL_MAX_CATALOG_NAME_LEN); - length[0] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_SCHEMA_NAME_LEN); - length[1] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_TABLE_NAME_LEN); - length[2] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_COLUMN_NAME_LEN); - length[3] = (n) ? (n + 1) : 128; - - if (trace) - htrc("JDBCPrimaryKeys: max=%d len=%d,%d,%d\n", - maxres, length[0], length[1], length[2]); - - /************************************************************************/ - /* Allocate the structure used to refer to the result set. */ - /************************************************************************/ - qrp = PlgAllocResult(g, ncol, maxres, IDS_PKEY, - buftyp, NULL, length, false, true); - - if (trace) - htrc("Getting pkey results ncol=%d\n", qrp->Nbcol); - - cap = AllocCatInfo(g, CAT_KEY, NULL, table, qrp); - - /************************************************************************/ - /* Now get the results into blocks. */ - /************************************************************************/ - if ((n = jcp->GetCatInfo(cap)) >= 0) { - qrp->Nblin = n; - // ResetNullValues(cap); - - if (trace) - htrc("PrimaryKeys: NBCOL=%d NBLIN=%d\n", qrp->Nbcol, qrp->Nblin); - - } else - qrp = NULL; - - /************************************************************************/ - /* Close any local connection. */ - /************************************************************************/ - if (!op) - jcp->Close(); - - /************************************************************************/ - /* Return the result pointer for use by GetData routines. */ - /************************************************************************/ - return qrp; -} // end of JDBCPrimaryKeys -#endif // 0 - /***********************************************************************/ /* JDBConn construction/destruction. */ /***********************************************************************/ @@ -739,60 +600,6 @@ bool JDBConn::gmID(PGLOBAL g, jmethodID& mid, const char *name, const char *sig } // end of gmID -#if 0 -/***********************************************************************/ -/* Utility routine. */ -/***********************************************************************/ -PSZ JDBConn::GetStringInfo(ushort infotype) -{ - //ASSERT(m_hdbc != SQL_NULL_HDBC); - char *p, buffer[MAX_STRING_INFO]; - SWORD result; - RETCODE rc; - - rc = SQLGetInfo(m_hdbc, infotype, buffer, sizeof(buffer), &result); - - if (!Check(rc)) { - ThrowDJX(rc, "SQLGetInfo"); // Temporary - // *buffer = '\0'; - } // endif rc - - p = PlugDup(m_G, buffer); - return p; -} // end of GetStringInfo - -/***********************************************************************/ -/* Utility routines. */ -/***********************************************************************/ -void JDBConn::OnSetOptions(HSTMT hstmt) -{ - RETCODE rc; - ASSERT(m_hdbc != SQL_NULL_HDBC); - - if ((signed)m_QueryTimeout != -1) { - // Attempt to set query timeout. Ignore failure - rc = SQLSetStmtOption(hstmt, SQL_QUERY_TIMEOUT, m_QueryTimeout); - - if (!Check(rc)) - // don't attempt it again - m_QueryTimeout = (DWORD)-1; - - } // endif m_QueryTimeout - - if (m_RowsetSize > 0) { - // Attempt to set rowset size. - // In case of failure reset it to 0 to use Fetch. - rc = SQLSetStmtOption(hstmt, SQL_ROWSET_SIZE, m_RowsetSize); - - if (!Check(rc)) - // don't attempt it again - m_RowsetSize = 0; - - } // endif m_RowsetSize - -} // end of OnSetOptions -#endif // 0 - /***********************************************************************/ /* Utility routine. */ /***********************************************************************/ @@ -1007,7 +814,7 @@ int JDBConn::Open(PJPARM sop) #define N 1 #endif - // Java source will be compiled as ajar file installed in the plugin dir + // Java source will be compiled as a jar file installed in the plugin dir jpop->Append(sep); jpop->Append(GetPluginDir()); jpop->Append("JdbcInterface.jar"); diff --git a/storage/connect/json.cpp b/storage/connect/json.cpp index 75bf277b25b..c45630129f1 100644 --- a/storage/connect/json.cpp +++ b/storage/connect/json.cpp @@ -595,7 +595,7 @@ PSZ Serialize(PGLOBAL g, PJSON jsp, char *fn, int pretty) fputs(EL, fs); fclose(fs); str = (err) ? NULL : strcpy(g->Message, "Ok"); - } else if (!err) { + } else if (!err) { str = ((JOUTSTR*)jp)->Strp; jp->WriteChr('\0'); PlugSubAlloc(g, NULL, ((JOUTSTR*)jp)->N); @@ -767,7 +767,7 @@ bool JOUTSTR::Escape(const char *s) { WriteChr('"'); - for (unsigned int i = 0; i < strlen(s); i++) + for (unsigned int i = 0; s[i]; i++) switch (s[i]) { case '"': case '\\': @@ -816,7 +816,7 @@ bool JOUTFILE::Escape(const char *s) // This is temporary fputc('"', Stream); - for (unsigned int i = 0; i < strlen(s); i++) + for (unsigned int i = 0; s[i]; i++) switch (s[i]) { case '"': fputs("\\\"", Stream); break; case '\\': fputs("\\\\", Stream); break; diff --git a/storage/connect/mycat.cc b/storage/connect/mycat.cc index da8be207237..b4b03e6ba4a 100644 --- a/storage/connect/mycat.cc +++ b/storage/connect/mycat.cc @@ -109,19 +109,7 @@ PQRYRES OEMColumns(PGLOBAL g, PTOS topt, char *tab, char *db, bool info); /***********************************************************************/ char *GetPluginDir(void) { - char *plugin_dir; - -#if defined(_WIN64) - plugin_dir = (char *)GetProcAddress(GetModuleHandle(NULL), - "?opt_plugin_dir@@3PADEA"); -#elif defined(_WIN32) - plugin_dir = (char*)GetProcAddress(GetModuleHandle(NULL), - "?opt_plugin_dir@@3PADA"); -#else - plugin_dir = opt_plugin_dir; -#endif - - return plugin_dir; + return opt_plugin_dir; } // end of GetPluginDir /***********************************************************************/ diff --git a/storage/connect/reldef.cpp b/storage/connect/reldef.cpp index 2c8ada52e6f..8ad6e203d51 100644 --- a/storage/connect/reldef.cpp +++ b/storage/connect/reldef.cpp @@ -294,7 +294,7 @@ int TABDEF::GetColCatInfo(PGLOBAL g) nlg+= nof; case TAB_DIR: case TAB_XML: - poff= loff + 1; + poff= loff + (pcf->Flags & U_VIRTUAL ? 0 : 1); break; case TAB_INI: case TAB_MAC: From 6c74ef8ae9cdfedb20827694362cad0fccaa5880 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 7 Sep 2016 09:30:02 +1000 Subject: [PATCH 29/96] MDEV-10707: Fix tokudb test rows-32m-rand-insert (#231) MDEV-10757: Fix tokudb test rows-32m-rand-insert --- storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result index 5c1c53946a4..b287c70469e 100644 --- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result +++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result @@ -1009,6 +1009,7 @@ Table Op Msg_type Msg_text test.t check status OK optimize table t; Table Op Msg_type Msg_text +test.t optimize note Table does not support optimize, doing recreate + analyze instead test.t optimize status OK check table t; Table Op Msg_type Msg_text From 577f3c1dce2011f51a01811fabd4ebd4e6f4d1ed Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 10 Sep 2016 17:50:32 +0200 Subject: [PATCH 30/96] Fix use of `require` in mysql-test-run. The motivation for this is that Perl is moving towards not having current directory ./ in @INC by default. This is causing mysql-test-run.pl to fail in latest Debian Unstable: https://lists.debian.org/debian-devel-announce/2016/08/msg00013.html However, we have `use "lib"`, there is no need for current directory in @INC, except for a gross hack. In mtr_cases.pm, there is a `require "mtr_misc.pl"`, which hides mtr_misc.pl away in mtr_cases namespace. And things only work because mysql-test-run.pl loads it with a different name, `require "lib/mtr_misc.pl"`! (Perl will `require` only once for each unique filename). Fix this by only using `require` in main program, and referencing functions with :: scope from other namespaces. For multi-use in different namespaces, proper `use` modules should be used. Signed-off-by: Kristian Nielsen --- mysql-test/lib/mtr_cases.pm | 4 +--- mysql-test/lib/mtr_report.pm | 3 +-- mysql-test/mysql-test-run.pl | 10 +++++----- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm index 10e5fd5c337..38c52b705f6 100644 --- a/mysql-test/lib/mtr_cases.pm +++ b/mysql-test/lib/mtr_cases.pm @@ -60,8 +60,6 @@ use My::Test; use My::Find; use My::Suite; -require "mtr_misc.pl"; - # locate plugin suites, depending on whether it's a build tree or installed my @plugin_suitedirs; my $plugin_suitedir_regex; @@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) { $file_to_tags{$file}= $tags; $file_to_master_opts{$file}= $master_opts; $file_to_slave_opts{$file}= $slave_opts; - $file_combinations{$file}= [ uniq(@combinations) ]; + $file_combinations{$file}= [ ::uniq(@combinations) ]; $file_in_overlay{$file} = 1 if $in_overlay; return @{$tags}; } diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm index 9ab82c454ed..97ace54f0fb 100644 --- a/mysql-test/lib/mtr_report.pm +++ b/mysql-test/lib/mtr_report.pm @@ -34,7 +34,6 @@ use mtr_match; use My::Platform; use POSIX qw[ _exit ]; use IO::Handle qw[ flush ]; -require "mtr_io.pl"; use mtr_results; my $tot_real_time= 0; @@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) { my $timer_str= ""; if ( $timer and -f "$::opt_vardir/log/timer" ) { - $timer_str= mtr_fromfile("$::opt_vardir/log/timer"); + $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer"); $tinfo->{timer}= $timer_str; resfile_test_info('duration', $timer_str) if $::opt_resfile; } diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index f3b733a1eac..9bfea2577c6 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -102,11 +102,11 @@ use mtr_results; use IO::Socket::INET; use IO::Select; -require "lib/mtr_process.pl"; -require "lib/mtr_io.pl"; -require "lib/mtr_gcov.pl"; -require "lib/mtr_gprof.pl"; -require "lib/mtr_misc.pl"; +require "mtr_process.pl"; +require "mtr_io.pl"; +require "mtr_gcov.pl"; +require "mtr_gprof.pl"; +require "mtr_misc.pl"; $SIG{INT}= sub { mtr_error("Got ^C signal"); }; $SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); }; From af3dc4825b09bf3de3aa092b480fff6514d1e0f8 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 10 Sep 2016 20:42:20 +0200 Subject: [PATCH 31/96] Attempt to fix strange rpm dependency issue following prior patch --- cmake/cpack_rpm.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake index fa7c563e57d..d684761187f 100644 --- a/cmake/cpack_rpm.cmake +++ b/cmake/cpack_rpm.cmake @@ -220,6 +220,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES "perl(mtr_io.pl)" "perl(mtr_match)" "perl(mtr_misc.pl)" + "perl(mtr_gcov.pl)" + "perl(mtr_gprof.pl)" + "perl(mtr_process.pl)" "perl(mtr_report)" "perl(mtr_results)" "perl(mtr_unique)") From b34d7fba31c4b18f12d400c247a09bce0ca635be Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 11 Sep 2016 11:18:27 +0200 Subject: [PATCH 32/96] Debian bug#837369 - test failures on hppa ENOTEMPTY is 247 on hppa, not 39 like on Linux, according to this report: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=837369 So add another replacement for this to rpl.rpl_drop_db and binlog.binlog_databasae tests (there were already a couple similar replacements for other platforms). Signed-off-by: Kristian Nielsen --- mysql-test/extra/binlog_tests/database.test | 2 +- mysql-test/suite/rpl/t/rpl_drop_db.test | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test index 82e8b396357..f111a028642 100644 --- a/mysql-test/extra/binlog_tests/database.test +++ b/mysql-test/extra/binlog_tests/database.test @@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix'; # Use '/' instead of '\' in the error message. On windows platform, dir is # formed with '\'. ---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/ +--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/ --error 1010 DROP DATABASE testing_1; let $wait_binlog_event= DROP TABLE IF EXIST; diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test index a67850a66dd..dae1651dc93 100644 --- a/mysql-test/suite/rpl/t/rpl_drop_db.test +++ b/mysql-test/suite/rpl/t/rpl_drop_db.test @@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1); select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt'; create table mysqltest1.t2 (n int); create table mysqltest1.t3 (n int); ---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; @@ -30,7 +30,7 @@ while ($1) } --enable_query_log ---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; From a2290919533df16afdfdbd0679f80734b5a36109 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 11 Sep 2016 20:52:00 +0200 Subject: [PATCH 33/96] potential signedness issue different fix for 07a33cdcef: Bug #23296299 : HANDLE_FATAL_SIGNAL (SIG=11) IN MY_TOSORT_UTF32 --- mysql-test/r/ctype_utf32.result | 3 +++ mysql-test/t/ctype_utf32.test | 5 +++++ strings/ctype-ucs2.c | 14 +++++++------- strings/ctype-utf8.c | 8 ++++---- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 1f316b7b68f..ae55f2c101e 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -1269,6 +1269,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061)) SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)); CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)) 1 +select hex(lower(cast(0xffff0000 as char character set utf32))) as c; +c +FFFF0000 # # End of 5.5 tests # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index 1be8925873c..8cbb8e2e55e 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -876,6 +876,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061)); SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061)); SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)); +# +# potential signedness issue +# +select hex(lower(cast(0xffff0000 as char character set utf32))) as c; + --echo # --echo # End of 5.5 tests --echo # diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index a79f5899ec5..ca6f53f3f8d 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1,5 +1,5 @@ /* Copyright (c) 2003, 2013, Oracle and/or its affiliates - Copyright (c) 2009, 2014, SkySQL Ab. + Copyright (c) 2009, 2016, MariaDB This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -1098,7 +1098,7 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), static inline void my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -1107,7 +1107,7 @@ my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) static inline void my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } @@ -1116,7 +1116,7 @@ my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) static inline void my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256) { if (uni_plane[page]) @@ -1727,7 +1727,7 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)), static inline void my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -1736,7 +1736,7 @@ my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) static inline void my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } @@ -1745,7 +1745,7 @@ my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) static inline void my_tosort_utf32(MY_UNICASE_INFO *const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256) { if (uni_plane[page]) diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 2dd7f5e6b92..f2782657bea 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2013, Monty Program Ab + Copyright (c) 2009, 2016, MariaDB This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -1939,7 +1939,7 @@ MY_UNICASE_INFO *const my_unicase_turkish[256]= static inline void my_tosort_unicode(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256) { if (uni_plane[page]) @@ -5024,7 +5024,7 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)), static inline void my_tolower_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].tolower; } @@ -5033,7 +5033,7 @@ my_tolower_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) static inline void my_toupper_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc) { - int page= *wc >> 8; + uint page= *wc >> 8; if (page < 256 && uni_plane[page]) *wc= uni_plane[page][*wc & 0xFF].toupper; } From 611dc0dcf4b39c670daf229f10e43b8b33f6e8c3 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 11 Sep 2016 20:53:16 +0200 Subject: [PATCH 34/96] missing element in prelocked_mode_name[] array different fix for a63a250d40: BUG#23509275 :DBUG_PRINT in THD::decide_logging_format prints incorrectly, access out-of-bound --- sql/sql_class.cc | 2 ++ sql/sql_class.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 05a8ee8091c..62339b2690a 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -4736,9 +4736,11 @@ int THD::decide_logging_format(TABLE_LIST *tables) { static const char *prelocked_mode_name[] = { "NON_PRELOCKED", + "LOCK_TABLES", "PRELOCKED", "PRELOCKED_UNDER_LOCK_TABLES", }; + compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last); DBUG_PRINT("debug", ("prelocked_mode: %s", prelocked_mode_name[locked_tables_mode])); } diff --git a/sql/sql_class.h b/sql/sql_class.h index da83382d5e9..27bc40e3761 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1075,7 +1075,8 @@ enum enum_locked_tables_mode LTM_NONE= 0, LTM_LOCK_TABLES, LTM_PRELOCKED, - LTM_PRELOCKED_UNDER_LOCK_TABLES + LTM_PRELOCKED_UNDER_LOCK_TABLES, + LTM_always_last }; From 347eeefbfc658c8531878218487d729f4e020805 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 11 Sep 2016 20:55:11 +0200 Subject: [PATCH 35/96] don't use my_copystat in the server it was supposed to be used in command-line tools only. Different fix for 4e5473862e: Bug#24388746: PRIVILEGE ESCALATION AND RACE CONDITION USING CREATE TABLE --- include/my_sys.h | 4 ++-- mysys/my_redel.c | 7 ++++--- mysys/my_static.c | 1 + sql/mysqld.cc | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 7e37fe598bd..001769a0b76 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2010, 2013, Monty Program Ab. + Copyright (c) 2010, 2016, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -246,7 +246,7 @@ extern my_bool my_use_symdir; extern ulong my_default_record_cache_size; extern my_bool my_disable_locking, my_disable_async_io, my_disable_flush_key_blocks, my_disable_symlinks; -extern my_bool my_disable_sync; +extern my_bool my_disable_sync, my_disable_copystat_in_redel; extern char wild_many,wild_one,wild_prefix; extern const char *charsets_dir; /* from default.c */ diff --git a/mysys/my_redel.c b/mysys/my_redel.c index b285bb25e2e..e5e4f48d9d5 100644 --- a/mysys/my_redel.c +++ b/mysys/my_redel.c @@ -1,5 +1,5 @@ -/* - Copyright (c) 2000, 2010, Oracle and/or its affiliates +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates + Copyright (c) 2009, 2016, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name, DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %d", org_name,tmp_name,MyFlags)); - if (my_copystat(org_name,tmp_name,MyFlags) < 0) + if (!my_disable_copystat_in_redel && + my_copystat(org_name,tmp_name,MyFlags) < 0) goto end; if (MyFlags & MY_REDEL_MAKE_BACKUP) { diff --git a/mysys/my_static.c b/mysys/my_static.c index fdc01b1248b..48b1e5b8dd9 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -99,6 +99,7 @@ my_bool my_disable_sync=0; my_bool my_disable_async_io=0; my_bool my_disable_flush_key_blocks=0; my_bool my_disable_symlinks=0; +my_bool my_disable_copystat_in_redel=0; /* Note that PSI_hook and PSI_server are unconditionally diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9b8f964629d..be9e21d6746 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3455,6 +3455,7 @@ static int init_common_variables() max_system_variables.pseudo_thread_id= (ulong)~0; server_start_time= flush_status_time= my_time(0); + my_disable_copystat_in_redel= 1; rpl_filter= new Rpl_filter; binlog_filter= new Rpl_filter; From 0da39caceea7733a94d898427d63ba2670160af4 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 12 Sep 2016 16:18:07 +0200 Subject: [PATCH 36/96] fix BIGINT+MEDIUMINT type aggregation --- mysql-test/r/type_uint.result | 19 +++++++++++++++++++ mysql-test/t/type_uint.test | 8 ++++++++ sql/field.cc | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result index e08605fb237..d67c735f067 100644 --- a/mysql-test/r/type_uint.result +++ b/mysql-test/r/type_uint.result @@ -14,3 +14,22 @@ this 0 4294967295 drop table t1; +create table t1 (a bigint unsigned, b mediumint unsigned); +insert t1 values (1,2),(0xffffffffffffffff,0xffffff); +select coalesce(a,b), coalesce(b,a) from t1; +coalesce(a,b) coalesce(b,a) +1 2 +18446744073709551615 16777215 +create table t2 as select a from t1 union select b from t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` bigint(20) unsigned DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t2; +a +1 +18446744073709551615 +2 +16777215 +drop table t1, t2; diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test index a9212183cb6..14e5e3bd621 100644 --- a/mysql-test/t/type_uint.test +++ b/mysql-test/t/type_uint.test @@ -15,3 +15,11 @@ select * from t1; drop table t1; # End of 4.1 tests + +create table t1 (a bigint unsigned, b mediumint unsigned); +insert t1 values (1,2),(0xffffffffffffffff,0xffffff); +select coalesce(a,b), coalesce(b,a) from t1; +create table t2 as select a from t1 union select b from t1; +show create table t2; +select * from t2; +drop table t1, t2; diff --git a/sql/field.cc b/sql/field.cc index a0686fb2f19..878e3d305af 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -359,7 +359,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 - MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG, + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, //MYSQL_TYPE_DATE MYSQL_TYPE_TIME MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR From 6e02d426d5f3970f4a92e2501f410468faa6ef2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 13 Sep 2016 13:16:11 +0200 Subject: [PATCH 37/96] Fix compilation failure of TokuDB on BSD-like systems mincore is defined differently in BSD mincore(void *, size_t, char *) vs linux variant of: mincore(void *, size_t, unsigned char *). Account for this difference in TokuDB. --- storage/tokudb/PerconaFT/portability/huge_page_detection.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc index bc48e93937d..8e73c56a6c5 100644 --- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc +++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc @@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void) const long pagesize = 4096; const long n_pages = TWO_MB/pagesize; +#ifdef __linux__ + // On linux mincore is defined as mincore(void *, size_t, unsigned char *) unsigned char vec[n_pages]; +#else + // On BSD (OS X included) it is defined as mincore(void *, size_t, char *) + char vec[n_pages]; +#endif { int r = mincore(second, TWO_MB, vec); if (r!=0 && errno==ENOMEM) { From b3f7a8019dae01ed03353856f62543248e6f9cd9 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Tue, 13 Sep 2016 11:12:54 -0400 Subject: [PATCH 38/96] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index acabf9b42d0..d44c8b28006 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=52 +MYSQL_VERSION_PATCH=53 MYSQL_VERSION_EXTRA= From 7d596c9ff526bc912769490023c44e9a5b2fa743 Mon Sep 17 00:00:00 2001 From: Olivier Bertrand Date: Fri, 16 Sep 2016 22:14:14 +0200 Subject: [PATCH 39/96] - Working on MDEV-10525. Lrecl mismatch on DBF files modified: storage/connect/filamdbf.cpp modified: storage/connect/filamdbf.h modified: storage/connect/reldef.cpp --- storage/connect/filamdbf.cpp | 86 ++++++++++++++++++++++++++++-------- storage/connect/filamdbf.h | 2 +- storage/connect/reldef.cpp | 6 ++- 3 files changed, 74 insertions(+), 20 deletions(-) diff --git a/storage/connect/filamdbf.cpp b/storage/connect/filamdbf.cpp index 8afda723578..a4557facbd8 100644 --- a/storage/connect/filamdbf.cpp +++ b/storage/connect/filamdbf.cpp @@ -383,7 +383,7 @@ DBFBASE::DBFBASE(DBFBASE *txfp) /* and header length. Set Records, check that Reclen is equal to lrecl and */ /* return the header length or 0 in case of error. */ /****************************************************************************/ -int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) +int DBFBASE::ScanHeader(PGLOBAL g, PSZ fn, int lrecl, int *rln, char *defpath) { int rc; char filename[_MAX_PATH]; @@ -393,7 +393,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) /************************************************************************/ /* Open the input file. */ /************************************************************************/ - PlugSetPath(filename, fname, defpath); + PlugSetPath(filename, fn, defpath); if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "rb"))) return 0; // Assume file does not exist @@ -410,11 +410,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) } else if (rc == RC_FX) return -1; - if ((int)header.Reclen() != lrecl) { - sprintf(g->Message, MSG(BAD_LRECL), lrecl, header.Reclen()); - return -1; - } // endif Lrecl - + *rln = (int)header.Reclen(); Records = (int)header.Records(); return (int)header.Headlen(); } // end of ScanHeader @@ -431,9 +427,27 @@ int DBFFAM::Cardinality(PGLOBAL g) if (!g) return 1; - if (!Headlen) - if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0) - return -1; // Error in ScanHeader + if (!Headlen) { + int rln = 0; // Record length in the file header + + Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath()); + + if (Headlen < 0) + return -1; // Error in ScanHeader + + if (rln && Lrecl != rln) { + // This happens always on some Linux platforms + sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln); + + if (Accept) { + Lrecl = rln; + PushWarning(g, Tdbp); + } else + return -1; + + } // endif rln + + } // endif Headlen // Set number of blocks for later use Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0; @@ -565,7 +579,13 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g) if (Lrecl != reclen) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, reclen); - return true; + + if (Accept) { + Lrecl = reclen; + PushWarning(g, Tdbp); + } else + return true; + } // endif Lrecl hlen = HEADLEN * (n + 1) + 2; @@ -641,8 +661,14 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g) if ((rc = dbfhead(g, Stream, Tdbp->GetFile(g), &header)) == RC_OK) { if (Lrecl != (int)header.Reclen()) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, header.Reclen()); - return true; - } // endif Lrecl + + if (Accept) { + Lrecl = header.Reclen(); + PushWarning(g, Tdbp); + } else + return true; + + } // endif Lrecl Records = (int)header.Records(); Headlen = (int)header.Headlen(); @@ -916,9 +942,27 @@ int DBMFAM::Cardinality(PGLOBAL g) if (!g) return 1; - if (!Headlen) - if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0) - return -1; // Error in ScanHeader + if (!Headlen) { + int rln = 0; // Record length in the file header + + Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath()); + + if (Headlen < 0) + return -1; // Error in ScanHeader + + if (rln && Lrecl != rln) { + // This happens always on some Linux platforms + sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln); + + if (Accept) { + Lrecl = rln; + PushWarning(g, Tdbp); + } else + return -1; + + } // endif rln + + } // endif Headlen // Set number of blocks for later use Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0; @@ -961,8 +1005,14 @@ bool DBMFAM::AllocateBuffer(PGLOBAL g) if (Lrecl != (int)hp->Reclen()) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, hp->Reclen()); - return true; - } // endif Lrecl + + if (Accept) { + Lrecl = hp->Reclen(); + PushWarning(g, Tdbp); + } else + return true; + + } // endif Lrecl Records = (int)hp->Records(); Headlen = (int)hp->Headlen(); diff --git a/storage/connect/filamdbf.h b/storage/connect/filamdbf.h index da84d7685a8..66458a10eaa 100644 --- a/storage/connect/filamdbf.h +++ b/storage/connect/filamdbf.h @@ -31,7 +31,7 @@ class DllExport DBFBASE { DBFBASE(PDBF txfp); // Implementation - int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath); + int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, int *rlen, char *defpath); protected: // Default constructor, not to be used diff --git a/storage/connect/reldef.cpp b/storage/connect/reldef.cpp index 8ad6e203d51..ac2327212e0 100644 --- a/storage/connect/reldef.cpp +++ b/storage/connect/reldef.cpp @@ -440,7 +440,11 @@ int TABDEF::GetColCatInfo(PGLOBAL g) } // endswitch tc // lrecl must be at least recln to avoid buffer overflow - recln= MY_MAX(recln, Hc->GetIntegerOption("Lrecl")); + if (trace) + htrc("Lrecl: Calculated=%d defined=%d\n", + recln, Hc->GetIntegerOption("Lrecl")); + + recln = MY_MAX(recln, Hc->GetIntegerOption("Lrecl")); Hc->SetIntegerOption("Lrecl", recln); ((PDOSDEF)this)->SetLrecl(recln); } // endif Lrecl From fd0c114c5dbd506b7bb795dd3674b942e90e7458 Mon Sep 17 00:00:00 2001 From: iangilfillan Date: Mon, 12 Sep 2016 14:57:32 +0200 Subject: [PATCH 40/96] Update contributors --- CREDITS | 1 + mysql-test/r/contributors.result | 1 + sql/contributors.h | 1 + 3 files changed, 3 insertions(+) diff --git a/CREDITS b/CREDITS index f0e6de7f08f..35ab4d48a8f 100644 --- a/CREDITS +++ b/CREDITS @@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016) Acronis http://acronis.com (2016) Nexedi https://www.nexedi.com (2016) Automattic https://automattic.com (2014 - 2016) +Tencent Game DBA http://tencentdba.com/about (2016) Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016) Virtuozzo https://virtuozzo.com (2016) diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result index 918ceaa496f..f3f5e227d3a 100644 --- a/mysql-test/r/contributors.result +++ b/mysql-test/r/contributors.result @@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation +Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation Google USA Sponsoring encryption, parallel replication and GTID Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction diff --git a/sql/contributors.h b/sql/contributors.h index f52d3243453..0359ec54022 100644 --- a/sql/contributors.h +++ b/sql/contributors.h @@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= { {"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"}, {"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"}, {"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"}, + {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"}, /* Sponsors of important features */ {"Google", "USA", "Sponsoring encryption, parallel replication and GTID"}, From 83d5b963bd38e327a673c5d4f4d70c8223f45dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Mon, 19 Sep 2016 17:15:18 +0200 Subject: [PATCH 41/96] Fix tokudb jemalloc linking Linking tokudb with jemalloc privately causes problems on library load/unload. To prevent dangling destructor pointers, link with the same library as the server is using. --- storage/tokudb/PerconaFT/portability/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt index 9f84d9b03df..4793db63cc1 100644 --- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt @@ -14,12 +14,11 @@ set(tokuportability_srcs ) add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs}) -target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC}) target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) add_library(tokuportability_static_conv STATIC ${tokuportability_srcs}) set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON) -set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) +set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}") maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv) From e56a53920b0075f9a534610032ee05f2e249e3ae Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Fri, 1 Jul 2016 13:57:18 +0400 Subject: [PATCH 42/96] MDEV-10315 - Online ALTER TABLE may get stuck in tdc_remove_table There was race condition between online ALTER TABLE and statements performing TABLE_SHARE release without marking it flushed (e.g. in case of table cache overflow, SET @@global.table_open_cache, manager thread purging table cache). The reason was missing mysql_cond_broadcast(). --- sql/table_cache.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/table_cache.cc b/sql/table_cache.cc index 097f37d26d8..bdb7914c32b 100644 --- a/sql/table_cache.cc +++ b/sql/table_cache.cc @@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share) } if (--share->tdc.ref_count) { + if (!share->is_view) + mysql_cond_broadcast(&share->tdc.COND_release); mysql_mutex_unlock(&share->tdc.LOCK_table_share); mysql_mutex_unlock(&LOCK_unused_shares); DBUG_VOID_RETURN; From 677c44f0c37973ad70550d9b807781e688764fae Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 23 Sep 2016 20:27:58 +0200 Subject: [PATCH 43/96] MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash when opening a system table for a SELECT-like read, pretend (for the sake of engines) it's SQLCOM_SELECT --- mysql-test/suite/innodb/r/system_tables.result | 8 ++++++++ mysql-test/suite/innodb/t/system_tables.test | 12 ++++++++++++ sql/sql_base.cc | 1 + 3 files changed, 21 insertions(+) create mode 100644 mysql-test/suite/innodb/r/system_tables.result create mode 100644 mysql-test/suite/innodb/t/system_tables.test diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result new file mode 100644 index 00000000000..79a24f7e455 --- /dev/null +++ b/mysql-test/suite/innodb/r/system_tables.result @@ -0,0 +1,8 @@ +alter table mysql.time_zone_name engine=InnoDB; +create table envois3 (starttime datetime) engine=InnoDB; +insert envois3 values ('2008-08-11 22:43:00'); +select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3; +starttime +2008-08-12 02:43:00 +drop table envois3; +alter table mysql.time_zone_name engine=MyISAM; diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test new file mode 100644 index 00000000000..90cb8c59fbd --- /dev/null +++ b/mysql-test/suite/innodb/t/system_tables.test @@ -0,0 +1,12 @@ +--source include/have_innodb.inc + +# +# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash +# +alter table mysql.time_zone_name engine=InnoDB; +create table envois3 (starttime datetime) engine=InnoDB; +insert envois3 values ('2008-08-11 22:43:00'); +--source include/restart_mysqld.inc +select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3; +drop table envois3; +alter table mysql.time_zone_name engine=MyISAM; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 6ec94016366..b9cc4e5d69a 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -9223,6 +9223,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list, */ lex->reset_n_backup_query_tables_list(&query_tables_list_backup); thd->reset_n_backup_open_tables_state(backup); + thd->lex->sql_command= SQLCOM_SELECT; if (open_and_lock_tables(thd, table_list, FALSE, MYSQL_OPEN_IGNORE_FLUSH | From 943443137460cab6499c032f0e5a03256daa9571 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 24 Sep 2016 13:50:42 +0200 Subject: [PATCH 44/96] Fix free() after my_malloc() (should be my_free()). Signed-off-by: Kristian Nielsen --- tests/async_queries.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/async_queries.c b/tests/async_queries.c index 75229eec4b4..c91edf5bb3b 100644 --- a/tests/async_queries.c +++ b/tests/async_queries.c @@ -425,7 +425,7 @@ main(int argc, char *argv[]) event_dispatch(); - free(sds); + my_free(sds); mysql_library_end(); From f620da194befe4506679ad7f1d0725796fad7de0 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 24 Sep 2016 01:17:35 +0200 Subject: [PATCH 45/96] MDEV-10725 Server 10.1.17 fails to build using clang with c++11 my_offsetof() returns a difference of two pointers, it must use the appropriate return type (my_ptrdiff_t, not size_t) --- include/my_global.h | 3 +-- sql/parse_file.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/my_global.h b/include/my_global.h index dce38a124c1..0c15478439f 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -857,8 +857,7 @@ typedef long long my_ptrdiff_t; and related routines are refactored. */ -#define my_offsetof(TYPE, MEMBER) \ - ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10)) +#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10) #define NullS (char *) 0 diff --git a/sql/parse_file.h b/sql/parse_file.h index 2a0266e98b7..83a8eabcf5f 100644 --- a/sql/parse_file.h +++ b/sql/parse_file.h @@ -42,9 +42,9 @@ enum file_opt_type { struct File_option { - LEX_STRING name; /**< Name of the option */ - int offset; /**< offset to base address of value */ - file_opt_type type; /**< Option type */ + LEX_STRING name; /**< Name of the option */ + my_ptrdiff_t offset; /**< offset to base address of value */ + file_opt_type type; /**< Option type */ }; From 8483659f4f017285a878ce563c1c756ee3e4d3dc Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 24 Sep 2016 10:06:58 +0200 Subject: [PATCH 46/96] report correct write error on log writes --- sql/log.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/log.cc b/sql/log.cc index da45a844bb3..bb8f06c80f7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2863,7 +2863,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time, if (! write_error) { write_error= 1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, error); + sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno); } } } From c91fdb66dbd26d832073e7b99075bfd0b5b9da11 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 26 Sep 2016 13:03:02 +0200 Subject: [PATCH 47/96] Windows , mtr : allow cdb to print core dumps also if --parallel > 1 --- mysql-test/lib/My/CoreDump.pm | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm index 0e90967ef95..f9f7b3d8d4b 100644 --- a/mysql-test/lib/My/CoreDump.pm +++ b/mysql-test/lib/My/CoreDump.pm @@ -261,11 +261,7 @@ sub show { # On Windows, rely on cdb to be there... if (IS_WINDOWS) { - # Starting cdb is unsafe when used with --parallel > 1 option - if ( $parallel < 2 ) - { - _cdb($core_name); - } + _cdb($core_name); return; } From d61e5260fb9983ea8dff539b23a6d0a150c2065c Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 26 Sep 2016 17:48:08 +0200 Subject: [PATCH 48/96] MDEV-10441 Document the server_audit_loc_info variable fix PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT plugin thdvars to work. use that for server_audit_loc_info --- .../suite/plugins/r/server_audit.result | 3 - .../plugins/r/thread_pool_server_audit.result | 3 - plugin/server_audit/server_audit.c | 5 +- sql/sql_plugin.cc | 173 ++++++++---------- 4 files changed, 80 insertions(+), 104 deletions(-) diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result index 2dcfa107103..c807107534d 100644 --- a/mysql-test/suite/plugins/r/server_audit.result +++ b/mysql-test/suite/plugins/r/server_audit.result @@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users -server_audit_loc_info server_audit_logging OFF server_audit_mode 0 server_audit_output_type file @@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 0 server_audit_output_type file @@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 1 server_audit_output_type file diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result index 2dcfa107103..c807107534d 100644 --- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result +++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result @@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users -server_audit_loc_info server_audit_logging OFF server_audit_mode 0 server_audit_output_type file @@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 0 server_audit_output_type file @@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 1 server_audit_output_type file diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index 30b7cdb5dcb..95150c82f25 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -429,9 +429,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit, char locinfo_ini_value[sizeof(struct connection_info)+4]; static MYSQL_THDVAR_STR(loc_info, - PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC, - "Auxiliary info.", NULL, NULL, - locinfo_ini_value); + PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC, + "Internal info", NULL, NULL, locinfo_ini_value); static const char *syslog_facility_names[]= { diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index c8c8c8ba324..2ec67a89746 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -2756,6 +2756,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name, } +static size_t var_storage_size(int flags) +{ + switch (flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: return sizeof(my_bool); + case PLUGIN_VAR_INT: return sizeof(int); + case PLUGIN_VAR_LONG: return sizeof(long); + case PLUGIN_VAR_ENUM: return sizeof(long); + case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong); + case PLUGIN_VAR_SET: return sizeof(ulonglong); + case PLUGIN_VAR_STR: return sizeof(char*); + case PLUGIN_VAR_DOUBLE: return sizeof(double); + default: DBUG_ASSERT(0); return 0; + } +} + + /* returns a bookmark for thd-local variables, creating if neccessary. returns null for non thd-local variables. @@ -2764,39 +2780,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name, static st_bookmark *register_var(const char *plugin, const char *name, int flags) { - uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size; + uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size; st_bookmark *result; char *varname, *p; - if (!(flags & PLUGIN_VAR_THDLOCAL)) - return NULL; - - switch (flags & PLUGIN_VAR_TYPEMASK) { - case PLUGIN_VAR_BOOL: - size= sizeof(my_bool); - break; - case PLUGIN_VAR_INT: - size= sizeof(int); - break; - case PLUGIN_VAR_LONG: - case PLUGIN_VAR_ENUM: - size= sizeof(long); - break; - case PLUGIN_VAR_LONGLONG: - case PLUGIN_VAR_SET: - size= sizeof(ulonglong); - break; - case PLUGIN_VAR_STR: - size= sizeof(char*); - break; - case PLUGIN_VAR_DOUBLE: - size= sizeof(double); - break; - default: - DBUG_ASSERT(0); - return NULL; - }; + DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL); + size= var_storage_size(flags); varname= ((char*) my_alloca(length)); strxmov(varname + 1, plugin, "_", name, NullS); for (p= varname + 1; *p; p++) @@ -3005,25 +2995,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock) */ for (idx= 0; idx < bookmark_hash.records; idx++) { - sys_var_pluginvar *pi; - sys_var *var; st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx); if (v->version <= thd->variables.dynamic_variables_version) continue; /* already in thd->variables */ - if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) || - !(pi= var->cast_pluginvar()) || - v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags)) - continue; - /* Here we do anything special that may be required of the data types */ - if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && - pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC) + if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + v->key[0] & BOOKMARK_MEMALLOC) { - int offset= ((thdvar_str_t *)(pi->plugin_var))->offset; - char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset); + char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset); if (*pp) *pp= my_strdup(*pp, MYF(MY_WME|MY_FAE)); } @@ -3284,6 +3266,48 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var) return false; } +static const void *var_def_ptr(st_mysql_sys_var *pv) +{ + switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { + case PLUGIN_VAR_INT: + return &((sysvar_uint_t*) pv)->def_val; + case PLUGIN_VAR_LONG: + return &((sysvar_ulong_t*) pv)->def_val; + case PLUGIN_VAR_LONGLONG: + return &((sysvar_ulonglong_t*) pv)->def_val; + case PLUGIN_VAR_ENUM: + return &((sysvar_enum_t*) pv)->def_val; + case PLUGIN_VAR_SET: + return &((sysvar_set_t*) pv)->def_val; + case PLUGIN_VAR_BOOL: + return &((sysvar_bool_t*) pv)->def_val; + case PLUGIN_VAR_STR: + return &((sysvar_str_t*) pv)->def_val; + case PLUGIN_VAR_DOUBLE: + return &((sysvar_double_t*) pv)->def_val; + case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL: + return &((thdvar_uint_t*) pv)->def_val; + case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL: + return &((thdvar_ulong_t*) pv)->def_val; + case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL: + return &((thdvar_ulonglong_t*) pv)->def_val; + case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: + return &((thdvar_enum_t*) pv)->def_val; + case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: + return &((thdvar_set_t*) pv)->def_val; + case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL: + return &((thdvar_bool_t*) pv)->def_val; + case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL: + return &((thdvar_str_t*) pv)->def_val; + case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL: + return &((thdvar_double_t*) pv)->def_val; + default: + DBUG_ASSERT(0); + return NULL; + } +} + + bool sys_var_pluginvar::global_update(THD *thd, set_var *var) { DBUG_ASSERT(!is_readonly()); @@ -3293,60 +3317,7 @@ bool sys_var_pluginvar::global_update(THD *thd, set_var *var) const void *src= &var->save_result; if (!var->value) - { - switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { - case PLUGIN_VAR_INT: - src= &((sysvar_uint_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_LONG: - src= &((sysvar_ulong_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_LONGLONG: - src= &((sysvar_ulonglong_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_ENUM: - src= &((sysvar_enum_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_SET: - src= &((sysvar_set_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_BOOL: - src= &((sysvar_bool_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_STR: - src= &((sysvar_str_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_DOUBLE: - src= &((sysvar_double_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_uint_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_ulong_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_ulonglong_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_enum_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_set_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_bool_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_str_t*) plugin_var)->def_val; - break; - case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_double_t*) plugin_var)->def_val; - break; - default: - DBUG_ASSERT(0); - } - } + src= var_def_ptr(plugin_var); plugin_var->update(thd, plugin_var, tgt, src); return false; @@ -3713,7 +3684,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp, *(int*)(opt + 1)= offset= v->offset; if (opt->flags & PLUGIN_VAR_NOCMDOPT) + { + char *val= global_system_variables.dynamic_variables_ptr + offset; + if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) && + (opt->flags & PLUGIN_VAR_MEMALLOC)) + { + char *def_val= *(char**)var_def_ptr(opt); + *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL; + } + else + memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags)); continue; + } optname= (char*) memdup_root(mem_root, v->key + 1, (optnamelen= v->name_len) + 1); @@ -3912,9 +3894,10 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, *str->value= strdup_root(mem_root, *str->value); } + var= find_bookmark(plugin_name.str, o->name, o->flags); if (o->flags & PLUGIN_VAR_NOSYSVAR) continue; - if ((var= find_bookmark(plugin_name.str, o->name, o->flags))) + if (var) v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp); else { From b38d3c3d8afea7183f2a595f0c8d8dd7efaa801f Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Tue, 27 Sep 2016 12:34:15 +0000 Subject: [PATCH 49/96] MDEV-10907 MTR and server writes can interleave in the error log Ensure atomic appends to the error log by using CreateFile with FILE_APPEND_DATA flag to open error log file (both MTR and server) --- mysql-test/lib/My/Platform.pm | 49 ++++++++++++++++++++++++++++++++++- mysql-test/lib/mtr_io.pl | 9 ++++--- mysys/my_fopen.c | 6 ++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/mysql-test/lib/My/Platform.pm b/mysql-test/lib/My/Platform.pm index 1776f1008da..110cf8a20e0 100644 --- a/mysql-test/lib/My/Platform.pm +++ b/mysql-test/lib/My/Platform.pm @@ -24,7 +24,7 @@ use File::Path; use base qw(Exporter); our @EXPORT= qw(IS_CYGWIN IS_WINDOWS IS_WIN32PERL native_path posix_path mixed_path - check_socket_path_length process_alive); + check_socket_path_length process_alive open_for_append); BEGIN { if ($^O eq "cygwin") { @@ -161,4 +161,51 @@ sub process_alive { } + +use Symbol qw( gensym ); + +use if $^O eq 'MSWin32', 'Win32API::File', qw( CloseHandle CreateFile GetOsFHandle OsFHandleOpen OPEN_ALWAYS FILE_APPEND_DATA + FILE_SHARE_READ FILE_SHARE_WRITE FILE_SHARE_DELETE ); +use if $^O eq 'MSWin32', 'Win32::API'; + +use constant WIN32API_FILE_NULL => []; + +# Open a file for append +# On Windows we use CreateFile with FILE_APPEND_DATA +# to insure that writes are atomic, not interleaved +# with writes by another processes. +sub open_for_append +{ + my ($file) = @_; + my $fh = gensym(); + + if (IS_WIN32PERL) + { + my $handle; + if (!($handle = CreateFile( + $file, + FILE_APPEND_DATA(), + FILE_SHARE_READ()|FILE_SHARE_WRITE()|FILE_SHARE_DELETE(), + WIN32API_FILE_NULL, + OPEN_ALWAYS(),# Create if doesn't exist. + 0, + WIN32API_FILE_NULL, + ))) + { + return undef; + } + + if (!OsFHandleOpen($fh, $handle, 'wat')) + { + CloseHandle($handle); + return undef; + } + return $fh; + } + + open($fh,">>",$file) or return undef; + return $fh; +} + + 1; diff --git a/mysql-test/lib/mtr_io.pl b/mysql-test/lib/mtr_io.pl index 8c2803f0427..0de4d9612ac 100644 --- a/mysql-test/lib/mtr_io.pl +++ b/mysql-test/lib/mtr_io.pl @@ -21,6 +21,7 @@ use strict; use Carp; +use My::Platform; sub mtr_fromfile ($); sub mtr_tofile ($@); @@ -45,10 +46,10 @@ sub mtr_fromfile ($) { sub mtr_tofile ($@) { my $file= shift; - - open(FILE,">>",$file) or mtr_error("can't open file \"$file\": $!"); - print FILE join("", @_); - close FILE; + my $fh= open_for_append $file; + mtr_error("can't open file \"$file\": $!") unless defined($fh); + print $fh join("", @_); + close $fh; } diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c index 52f61649bb3..cc1019365ac 100644 --- a/mysys/my_fopen.c +++ b/mysys/my_fopen.c @@ -102,6 +102,7 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream) HANDLE osfh; DBUG_ASSERT(path && stream); + DBUG_ASSERT(strchr(mode, 'a')); /* We use FILE_APPEND_DATA below */ /* Services don't have stdout/stderr on Windows, so _fileno returns -1. */ if (fd < 0) @@ -112,15 +113,14 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream) fd= _fileno(stream); } - if ((osfh= CreateFile(path, GENERIC_READ | GENERIC_WRITE, + if ((osfh= CreateFile(path, GENERIC_READ | FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) return NULL; - if ((handle_fd= _open_osfhandle((intptr_t)osfh, - _O_APPEND | _O_TEXT)) == -1) + if ((handle_fd= _open_osfhandle((intptr_t)osfh, _O_TEXT)) == -1) { CloseHandle(osfh); return NULL; From 094f140c9ae672eb61a27f15c5e99c7114ed0150 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 27 Sep 2016 17:56:00 +0200 Subject: [PATCH 50/96] 5.6.33 --- storage/innobase/fts/fts0fts.cc | 31 +++++++++++++++++++++ storage/innobase/handler/ha_innodb.cc | 9 ++++-- storage/innobase/handler/handler0alter.cc | 9 ++++-- storage/innobase/handler/i_s.cc | 2 ++ storage/innobase/include/fts0fts.h | 10 +++++++ storage/innobase/row/row0log.cc | 14 +++++----- storage/innobase/row/row0mysql.cc | 34 +++++++++++++++++++++++ 7 files changed, 97 insertions(+), 12 deletions(-) diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 25059db96b0..a0f0fab5566 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key; /** variable to record innodb_fts_internal_tbl_name for information schema table INNODB_FTS_INSERTED etc. */ UNIV_INTERN char* fts_internal_tbl_name = NULL; +UNIV_INTERN char* fts_internal_tbl_name2 = NULL; /** InnoDB default stopword list: There are different versions of stopwords, the stop words listed @@ -6569,6 +6570,36 @@ fts_check_corrupt_index( return(0); } +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len) +{ + fts_aux_table_t aux_table; + char* parent_table_name = NULL; + + if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) { + dict_table_t* parent_table; + + parent_table = dict_table_open_on_id( + aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL); + + if (parent_table != NULL) { + parent_table_name = mem_strdupl( + parent_table->name, + strlen(parent_table->name)); + + dict_table_close(parent_table, TRUE, FALSE); + } + } + + return(parent_table_name); +} + /** Check the validity of the parent table. @param[in] aux_table auxiliary table @return true if it is a valid table or false if it is not */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 95a0e08a806..be5e74e1617 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -14505,7 +14505,12 @@ innodb_internal_table_update( my_free(old); } - fts_internal_tbl_name = *(char**) var_ptr; + fts_internal_tbl_name2 = *(char**) var_ptr; + if (fts_internal_tbl_name2 == NULL) { + fts_internal_tbl_name = const_cast("default"); + } else { + fts_internal_tbl_name = fts_internal_tbl_name2; + } } /****************************************************************//** @@ -16253,7 +16258,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache, "Whether to disable OS system file cache for sort I/O", NULL, NULL, FALSE); -static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name, +static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2, PLUGIN_VAR_NOCMDARG, "FTS internal auxiliary table to be checked", innodb_internal_table_validate, diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 961e0818d39..2261754a4f5 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -201,7 +201,10 @@ innobase_need_rebuild( /*==================*/ const Alter_inplace_info* ha_alter_info) { - if (ha_alter_info->handler_flags + Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags = + ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE); + + if (alter_inplace_flags == Alter_inplace_info::CHANGE_CREATE_OPTION && !(ha_alter_info->create_info->used_fields & (HA_CREATE_USED_ROW_FORMAT @@ -3760,7 +3763,7 @@ err_exit: } if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA) - || (ha_alter_info->handler_flags + || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info))) { @@ -3926,7 +3929,7 @@ ok_exit: DBUG_RETURN(false); } - if (ha_alter_info->handler_flags + if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info)) { goto ok_exit; diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 43905f4a32b..a780ee5cd62 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -4038,6 +4038,8 @@ i_s_fts_config_fill( DBUG_RETURN(0); } + DEBUG_SYNC_C("i_s_fts_config_fille_check"); + fields = table->field; /* Prevent DDL to drop fts aux tables. */ diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index 87b5787d416..3e2f359bbeb 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -375,6 +375,7 @@ extern bool fts_need_sync; /** Variable specifying the table that has Fulltext index to display its content through information schema table */ extern char* fts_internal_tbl_name; +extern char* fts_internal_tbl_name2; #define fts_que_graph_free(graph) \ do { \ @@ -823,6 +824,15 @@ void fts_drop_orphaned_tables(void); /*==========================*/ +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len); + /******************************************************************//** Since we do a horizontal split on the index table, we need to drop all the split tables. diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index a6751b208f7..54183759e8d 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -613,7 +613,7 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 4 + old_pk_size; + mrec_size = 6 + old_pk_size; /* Log enough prefix of the BLOB unless both the old and new table are in COMPACT or REDUNDANT format, @@ -643,8 +643,8 @@ row_log_table_delete( *b++ = static_cast(old_pk_extra_size); /* Log the size of external prefix we saved */ - mach_write_to_2(b, ext_size); - b += 2; + mach_write_to_4(b, ext_size); + b += 4; rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, @@ -2268,14 +2268,14 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */ - if (mrec + 4 >= mrec_end) { + /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ + if (mrec + 6 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_2(mrec); - mrec += 2; + ext_size = mach_read_from_4(mrec); + mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index c3a7e2c2807..11bef1064d6 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2676,6 +2676,10 @@ loop: return(n_tables + n_tables_dropped); } + DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep", + os_thread_sleep(5000000); + ); + table = dict_table_open_on_name(drop->table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); @@ -2686,6 +2690,16 @@ loop: goto already_dropped; } + if (!table->to_be_dropped) { + /* There is a scenario: the old table is dropped + just after it's added into drop list, and new + table with the same name is created, then we try + to drop the new table in background. */ + dict_table_close(table, FALSE, FALSE); + + goto already_dropped; + } + ut_a(!table->can_be_evicted); dict_table_close(table, FALSE, FALSE); @@ -3945,6 +3959,13 @@ row_drop_table_for_mysql( } } + + DBUG_EXECUTE_IF("row_drop_table_add_to_background", + row_add_table_to_background_drop_list(table->name); + err = DB_SUCCESS; + goto funct_exit; + ); + /* TODO: could we replace the counter n_foreign_key_checks_running with lock checks on the table? Acquire here an exclusive lock on the table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that @@ -4561,6 +4582,19 @@ loop: row_mysql_lock_data_dictionary(trx); while ((table_name = dict_get_first_table_name_in_db(name))) { + /* Drop parent table if it is a fts aux table, to + avoid accessing dropped fts aux tables in information + scheam when parent table still exists. + Note: Drop parent table will drop fts aux tables. */ + char* parent_table_name; + parent_table_name = fts_get_parent_table_name( + table_name, strlen(table_name)); + + if (parent_table_name != NULL) { + mem_free(table_name); + table_name = parent_table_name; + } + ut_a(memcmp(table_name, name, namelen) == 0); table = dict_table_open_on_name( From e3124a8cd79f3639e6159d1fbf87579c3e2b3ebb Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 27 Sep 2016 17:57:28 +0200 Subject: [PATCH 51/96] 5.6.33 --- .../suite/perfschema/r/aggregate.result | 118 ----------- mysql-test/suite/perfschema/t/aggregate.test | 191 ------------------ 2 files changed, 309 deletions(-) delete mode 100644 mysql-test/suite/perfschema/r/aggregate.result delete mode 100644 mysql-test/suite/perfschema/t/aggregate.test diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result deleted file mode 100644 index ab927f544cf..00000000000 --- a/mysql-test/suite/perfschema/r/aggregate.result +++ /dev/null @@ -1,118 +0,0 @@ -"General cleanup" -drop table if exists t1; -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.socket_summary_by_event_name; -truncate table performance_schema.socket_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -create table t1 ( -id INT PRIMARY KEY, -b CHAR(100) DEFAULT 'initial value') -ENGINE=MyISAM; -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); -update performance_schema.setup_instruments SET enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -set @dump_all=FALSE; -"Verifying file aggregate consistency" -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; -EVENT_NAME COUNT_READ SUM(i.COUNT_READ) -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; -EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE) -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; -EVENT_NAME COUNT_READ SUM(i.COUNT_READ) -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; -EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -"Verifying waits aggregate consistency (instance)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT) -"Verifying waits aggregate consistency (thread)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT) -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -drop table test.t1; diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test deleted file mode 100644 index 326c0e763d9..00000000000 --- a/mysql-test/suite/perfschema/t/aggregate.test +++ /dev/null @@ -1,191 +0,0 @@ -# Tests for PERFORMANCE_SCHEMA -# Verify that statistics aggregated by different criteria are consistent. - ---source include/not_embedded.inc ---source include/have_perfschema.inc ---source include/have_QC_Disabled.inc - ---echo "General cleanup" - ---disable_warnings -drop table if exists t1; ---enable_warnings - -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; - -# Cleanup statistics -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.socket_summary_by_event_name; -truncate table performance_schema.socket_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; - -# Start recording data -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - - -create table t1 ( - id INT PRIMARY KEY, - b CHAR(100) DEFAULT 'initial value') - ENGINE=MyISAM; - -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); - -# Stop recording data, so the select below don't add noise. -update performance_schema.setup_instruments SET enabled = 'NO'; -# Disable all consumers, for long standing waits -update performance_schema.setup_consumers set enabled = 'NO'; - -# Helper to debug -set @dump_all=FALSE; - -# Note that in general: -# - COUNT/SUM/MAX(file_summary_by_event_name) >= -# COUNT/SUM/MAX(file_summary_by_instance). -# - MIN(file_summary_by_event_name) <= -# MIN(file_summary_by_instance). -# There will be equality only when file instances are not removed, -# aka when a file is not deleted from the file system, -# because doing so removes a row in file_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_instance) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_instance) -# There will be equality only when an instrument instance -# is not removed, which is next to impossible to predictably guarantee -# in the server. -# For example, a MyISAM table removed from the table cache -# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock. -# Another example, a thread terminating will cause a mysql_mutex_destroy -# on sql/LOCK_delete -# Both cause a row to be deleted from events_waits_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_thread_by_event_name) -# There will be equality only when no thread is removed, -# that is if no thread disconnects, or no sub thread (for example insert -# delayed) ever completes. -# A thread completing will cause rows in -# events_waits_summary_by_thread_by_event_name to be removed. - ---echo "Verifying file aggregate consistency" - -# Since the code generating the load in this test does: -# - create table -# - insert -# - does not cause temporary tables to be used -# we can test for equality here for file aggregates. - -# If any of these queries returns data, the test failed. - -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (instance)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (thread)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; - - -# Cleanup - -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - -drop table test.t1; From e312e2e636b84ba4d0d64cc5a7bb368d3286c5ed Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 27 Sep 2016 17:59:58 +0200 Subject: [PATCH 52/96] 5.6.32-78.1 --- storage/tokudb/CMakeLists.txt | 2 +- .../tokudb/PerconaFT/buildheader/make_tdb.cc | 4 +- .../cmake_modules/TokuSetupCompiler.cmake | 1 + storage/tokudb/PerconaFT/ft/CMakeLists.txt | 2 +- storage/tokudb/PerconaFT/ft/ft-flusher.cc | 4 +- storage/tokudb/PerconaFT/ft/ft-ops.cc | 215 +++-- .../tokudb/PerconaFT/ft/ft-recount-rows.cc | 29 +- storage/tokudb/PerconaFT/ft/ft.cc | 3 + .../PerconaFT/ft/loader/loader-internal.h | 2 +- storage/tokudb/PerconaFT/ft/loader/loader.cc | 2 +- storage/tokudb/PerconaFT/ft/node.cc | 125 +-- storage/tokudb/PerconaFT/ft/node.h | 1 + .../PerconaFT/ft/serialize/block_allocator.cc | 475 +++------- .../PerconaFT/ft/serialize/block_allocator.h | 160 ++-- .../ft/serialize/block_allocator_strategy.cc | 224 ----- .../PerconaFT/ft/serialize/block_table.cc | 632 ++++++++----- .../PerconaFT/ft/serialize/block_table.h | 141 ++- .../tokudb/PerconaFT/ft/serialize/compress.cc | 2 +- .../PerconaFT/ft/serialize/ft-serialize.cc | 344 ++++---- .../ft/serialize/ft_node-serialize.cc | 69 +- .../PerconaFT/ft/serialize/rbtree_mhs.cc | 833 ++++++++++++++++++ .../PerconaFT/ft/serialize/rbtree_mhs.h | 351 ++++++++ .../ft/tests/block_allocator_strategy_test.cc | 126 --- .../ft/tests/block_allocator_test.cc | 362 ++++---- .../PerconaFT/ft/tests/cachetable-5978.cc | 2 +- .../ft/tests/cachetable-simple-clone2.cc | 2 +- .../tokudb/PerconaFT/ft/tests/ft-bfe-query.cc | 401 +++++---- .../PerconaFT/ft/tests/ft-clock-test.cc | 279 +++--- .../ft/tests/ft-serialize-benchmark.cc | 231 +++-- .../PerconaFT/ft/tests/ft-serialize-test.cc | 829 ++++++++++------- storage/tokudb/PerconaFT/ft/tests/ft-test.cc | 11 +- .../tokudb/PerconaFT/ft/tests/pqueue-test.cc | 4 +- .../ft/tests/test-leafentry-nested.cc | 2 +- .../tests/test-oldest-referenced-xid-flush.cc | 6 +- .../test-rbtree-insert-remove-with-mhs.cc} | 72 +- .../test-rbtree-insert-remove-without-mhs.cc | 102 +++ storage/tokudb/PerconaFT/ft/txn/roll.cc | 2 +- .../tokudb/PerconaFT/ft/txn/rollback-apply.cc | 2 +- .../PerconaFT/ft/txn/rollback-ct-callbacks.cc | 23 +- storage/tokudb/PerconaFT/ft/ule.cc | 4 +- .../portability/tests/test-max-data.cc | 2 +- .../PerconaFT/portability/toku_config.h.in | 1 - .../tokudb/PerconaFT/portability/toku_time.h | 8 + .../tokudb/PerconaFT/src/indexer-internal.h | 2 +- .../tokudb/PerconaFT/src/indexer-undo-do.cc | 4 +- .../hotindexer-undo-do-tests/commit.i0.test | 2 +- .../PerconaFT/src/tests/loader-dup-test.cc | 2 +- .../src/tests/recovery_fileops_unit.cc | 2 +- .../src/tests/stat64-root-changes.cc | 4 +- .../src/tests/test_insert_many_gc.cc | 2 +- .../PerconaFT/src/tests/test_stress0.cc | 2 +- .../PerconaFT/src/tests/test_txn_abort5a.cc | 3 +- storage/tokudb/PerconaFT/src/ydb-internal.h | 2 +- .../xz-4.999.9beta/build-aux/config.guess | 501 +++++------ storage/tokudb/PerconaFT/tools/CMakeLists.txt | 3 +- storage/tokudb/PerconaFT/tools/ba_replay.cc | 629 ------------- storage/tokudb/PerconaFT/tools/ftverify.cc | 2 +- storage/tokudb/PerconaFT/tools/tokuftdump.cc | 1 + .../tokudb/PerconaFT/util/tests/x1764-test.cc | 2 +- storage/tokudb/ha_tokudb.cc | 26 +- storage/tokudb/ha_tokudb_admin.cc | 278 +++--- storage/tokudb/hatoku_defines.h | 7 +- .../tokudb/r/background_job_manager.result | 2 +- .../mysql-test/tokudb_bugs/t/frm_store.test | 26 +- .../mysql-test/tokudb_bugs/t/frm_store2.test | 26 +- .../mysql-test/tokudb_bugs/t/frm_store3.test | 26 +- .../t/tokudb_drop_part_table_668.test | 41 +- .../t/tokudb_drop_simple_table_668.test | 41 +- .../r/rpl_foreign_key_tokudb.result | 58 -- .../tokudb_rpl/t/rpl_foreign_key_tokudb.test | 4 - storage/tokudb/tokudb_background.cc | 27 +- storage/tokudb/tokudb_background.h | 49 +- storage/tokudb/tokudb_information_schema.cc | 47 +- 73 files changed, 4195 insertions(+), 3718 deletions(-) delete mode 100644 storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc create mode 100644 storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc create mode 100644 storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h delete mode 100644 storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc rename storage/tokudb/PerconaFT/ft/{serialize/block_allocator_strategy.h => tests/test-rbtree-insert-remove-with-mhs.cc} (55%) create mode 100644 storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc delete mode 100644 storage/tokudb/PerconaFT/tools/ba_replay.cc delete mode 100644 storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result delete mode 100644 storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 4ec539f7d0b..fbb02582f4d 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -1,4 +1,4 @@ -SET(TOKUDB_VERSION 5.6.31-77.0) +SET(TOKUDB_VERSION 5.6.32-78.1) # PerconaFT only supports x86-64 and cmake-2.8.9+ IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT CMAKE_VERSION VERSION_LESS "2.8.9") diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc index 4b62703480f..576f902f6ae 100644 --- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc +++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc @@ -367,8 +367,8 @@ static void print_db_env_struct (void) { "int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */", "int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */", "int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */", - "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */", - "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */", + "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */", + "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */", "int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */", "int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */", "int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */", diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake index 5f2c9ef2c2a..ea5c7e22209 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake @@ -101,6 +101,7 @@ set_cflags_if_supported( -Wno-pointer-bool-conversion -fno-rtti -fno-exceptions + -Wno-error=nonnull-compare ) ## set_cflags_if_supported_named("-Weffc++" -Weffcpp) diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt index 11091073ac2..6696c26ecc0 100644 --- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt @@ -55,8 +55,8 @@ set(FT_SOURCES msg_buffer node pivotkeys + serialize/rbtree_mhs serialize/block_allocator - serialize/block_allocator_strategy serialize/block_table serialize/compress serialize/ft_node-serialize diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc index fb456ea6a18..e6452f60cfc 100644 --- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc +++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc @@ -496,7 +496,7 @@ handle_split_of_child( // We never set the rightmost blocknum to be the root. // Instead, we wait for the root to split and let promotion initialize the rightmost - // blocknum to be the first non-root leaf node on the right extreme to recieve an insert. + // blocknum to be the first non-root leaf node on the right extreme to receive an insert. BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum); invariant(ft->h->root_blocknum.b != rightmost_blocknum.b); if (childa->blocknum.b == rightmost_blocknum.b) { @@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // It is possible after reading in the entire child, // that we now know that the child is not reactive // if so, we can unpin parent right now - // we wont be splitting/merging child + // we won't be splitting/merging child // and we have already replaced the bnc // for the root with a fresh one enum reactivity child_re = toku_ftnode_get_reactivity(ft, child); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index 8f61bc67339..f131668889e 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -598,15 +598,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) { } } -void toku_ftnode_clone_callback( - void* value_data, - void** cloned_value_data, - long* clone_size, - PAIR_ATTR* new_attr, - bool for_checkpoint, - void* write_extraargs - ) -{ +void toku_ftnode_clone_callback(void *value_data, + void **cloned_value_data, + long *clone_size, + PAIR_ATTR *new_attr, + bool for_checkpoint, + void *write_extraargs) { FTNODE node = static_cast(value_data); toku_ftnode_assert_fully_in_memory(node); FT ft = static_cast(write_extraargs); @@ -618,13 +615,16 @@ void toku_ftnode_clone_callback( toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize); } - cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known; - cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk; + cloned_node->oldest_referenced_xid_known = + node->oldest_referenced_xid_known; + cloned_node->max_msn_applied_to_node_on_disk = + node->max_msn_applied_to_node_on_disk; cloned_node->flags = node->flags; cloned_node->blocknum = node->blocknum; cloned_node->layout_version = node->layout_version; cloned_node->layout_version_original = node->layout_version_original; - cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk; + cloned_node->layout_version_read_from_disk = + node->layout_version_read_from_disk; cloned_node->build_id = node->build_id; cloned_node->height = node->height; cloned_node->dirty = node->dirty; @@ -649,38 +649,39 @@ void toku_ftnode_clone_callback( // set new pair attr if necessary if (node->height == 0) { *new_attr = make_ftnode_pair_attr(node); - } - else { + for (int i = 0; i < node->n_children; i++) { + BLB(node, i)->logical_rows_delta = 0; + BLB(cloned_node, i)->logical_rows_delta = 0; + } + } else { new_attr->is_valid = false; } *clone_size = ftnode_memory_size(cloned_node); *cloned_value_data = cloned_node; } -void toku_ftnode_flush_callback( - CACHEFILE UU(cachefile), - int fd, - BLOCKNUM blocknum, - void *ftnode_v, - void** disk_data, - void *extraargs, - PAIR_ATTR size __attribute__((unused)), - PAIR_ATTR* new_size, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ - FT ft = (FT) extraargs; - FTNODE ftnode = (FTNODE) ftnode_v; - FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; +void toku_ftnode_flush_callback(CACHEFILE UU(cachefile), + int fd, + BLOCKNUM blocknum, + void *ftnode_v, + void **disk_data, + void *extraargs, + PAIR_ATTR size __attribute__((unused)), + PAIR_ATTR *new_size, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone) { + FT ft = (FT)extraargs; + FTNODE ftnode = (FTNODE)ftnode_v; + FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data; assert(ftnode->blocknum.b == blocknum.b); int height = ftnode->height; if (write_me) { toku_ftnode_assert_fully_in_memory(ftnode); if (height > 0 && !is_clone) { - // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback() + // cloned nodes already had their stale messages moved, see + // toku_ftnode_clone_callback() toku_move_ftnode_messages_to_stale(ft, ftnode); } else if (height == 0) { toku_ftnode_leaf_run_gc(ft, ftnode); @@ -688,7 +689,8 @@ void toku_ftnode_flush_callback( toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint); } } - int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); + int r = toku_serialize_ftnode_to( + fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); assert_zero(r); ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION; } @@ -703,20 +705,22 @@ void toku_ftnode_flush_callback( FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size); } toku_free(*disk_data); - } - else { + } else { if (ftnode->height == 0) { for (int i = 0; i < ftnode->n_children; i++) { - if (BP_STATE(ftnode,i) == PT_AVAIL) { + if (BP_STATE(ftnode, i) == PT_AVAIL) { BASEMENTNODE bn = BLB(ftnode, i); - toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, + bn->stat64_delta); + if (!ftnode->dirty) + toku_ft_adjust_logical_row_count( + ft, -bn->logical_rows_delta); } } } } toku_ftnode_free(&ftnode); - } - else { + } else { *new_size = make_ftnode_pair_attr(ftnode); } } @@ -845,10 +849,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr } // callback for partially evicting a node -int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs, - void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) { - FTNODE node = (FTNODE) ftnode_pv; - FT ft = (FT) write_extraargs; +int toku_ftnode_pe_callback(void *ftnode_pv, + PAIR_ATTR old_attr, + void *write_extraargs, + void (*finalize)(PAIR_ATTR new_attr, void *extra), + void *finalize_extra) { + FTNODE node = (FTNODE)ftnode_pv; + FT ft = (FT)write_extraargs; int num_partial_evictions = 0; // Hold things we intend to destroy here. @@ -866,7 +873,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext } // Don't partially evict nodes whose partitions can't be read back // from disk individually - if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { + if (node->layout_version_read_from_disk < + FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { goto exit; } // @@ -874,77 +882,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext // if (node->height > 0) { for (int i = 0; i < node->n_children; i++) { - if (BP_STATE(node,i) == PT_AVAIL) { - if (BP_SHOULD_EVICT(node,i)) { + if (BP_STATE(node, i) == PT_AVAIL) { + if (BP_SHOULD_EVICT(node, i)) { NONLEAF_CHILDINFO bnc = BNC(node, i); if (ft_compress_buffers_before_eviction && - // We may not serialize and compress a partition in memory if its - // in memory layout version is different than what's on disk (and - // therefore requires upgrade). + // We may not serialize and compress a partition in + // memory if its in memory layout version is different + // than what's on disk (and therefore requires upgrade). // - // Auto-upgrade code assumes that if a node's layout version read - // from disk is not current, it MUST require upgrade. Breaking - // this rule would cause upgrade code to upgrade this partition - // again after we serialize it as the current version, which is bad. - node->layout_version == node->layout_version_read_from_disk) { + // Auto-upgrade code assumes that if a node's layout + // version read from disk is not current, it MUST + // require upgrade. + // Breaking this rule would cause upgrade code to + // upgrade this partition again after we serialize it as + // the current version, which is bad. + node->layout_version == + node->layout_version_read_from_disk) { toku_ft_bnc_move_messages_to_stale(ft, bnc); compress_internal_node_partition( node, i, // Always compress with quicklz - TOKU_QUICKLZ_METHOD - ); + TOKU_QUICKLZ_METHOD); } else { // We're not compressing buffers before eviction. Simply - // detach the buffer and set the child's state to on-disk. + // detach the buffer and set the child's state to + // on-disk. set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; } buffers_to_destroy[num_buffers_to_destroy++] = bnc; num_partial_evictions++; + } else { + BP_SWEEP_CLOCK(node, i); } - else { - BP_SWEEP_CLOCK(node,i); - } - } - else { + } else { continue; } } - } - // - // partial eviction strategy for basement nodes: - // if the bn is compressed, evict it - // else: check if it requires eviction, if it does, evict it, if not, sweep the clock count - // - else { + } else { + // + // partial eviction strategy for basement nodes: + // if the bn is compressed, evict it + // else: check if it requires eviction, if it does, evict it, if not, + // sweep the clock count + // for (int i = 0; i < node->n_children; i++) { // Get rid of compressed stuff no matter what. - if (BP_STATE(node,i) == PT_COMPRESSED) { + if (BP_STATE(node, i) == PT_COMPRESSED) { SUB_BLOCK sb = BSB(node, i); pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr; pointers_to_free[num_pointers_to_free++] = sb; set_BNULL(node, i); - BP_STATE(node,i) = PT_ON_DISK; + BP_STATE(node, i) = PT_ON_DISK; num_partial_evictions++; - } - else if (BP_STATE(node,i) == PT_AVAIL) { - if (BP_SHOULD_EVICT(node,i)) { + } else if (BP_STATE(node, i) == PT_AVAIL) { + if (BP_SHOULD_EVICT(node, i)) { BASEMENTNODE bn = BLB(node, i); basements_to_destroy[num_basements_to_destroy++] = bn; - toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, + bn->stat64_delta); + toku_ft_adjust_logical_row_count(ft, + -bn->logical_rows_delta); set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; num_partial_evictions++; + } else { + BP_SWEEP_CLOCK(node, i); } - else { - BP_SWEEP_CLOCK(node,i); - } - } - else if (BP_STATE(node,i) == PT_ON_DISK) { + } else if (BP_STATE(node, i) == PT_ON_DISK) { continue; - } - else { + } else { abort(); } } @@ -2378,12 +2386,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) { toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); } -void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging) { +void toku_ft_maybe_update(FT_HANDLE ft_h, + const DBT *key, + const DBT *update_function_extra, + TOKUTXN txn, + bool oplsn_valid, + LSN oplsn, + bool do_logging) { TXNID_PAIR xid = toku_txn_get_txnid(txn); if (txn) { - BYTESTRING keybs = { key->size, (char *) key->data }; + BYTESTRING keybs = {key->size, (char *)key->data}; toku_logger_save_rollback_cmdupdate( txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); toku_txn_maybe_note_ft(txn, ft_h->ft); @@ -2392,22 +2404,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func TOKULOGGER logger; logger = toku_txn_logger(txn); if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_update(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, keybs, extrabs); + BYTESTRING keybs = {.len = key->size, .data = (char *)key->data}; + BYTESTRING extrabs = {.len = update_function_extra->size, + .data = (char *)update_function_extra->data}; + toku_log_enq_update(logger, + NULL, + 0, + txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, + keybs, + extrabs); } LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + if (oplsn_valid && + oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { // do nothing } else { - XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); - ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); + XIDS message_xids = + txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg( + key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); ft_send_update_msg(ft_h, msg, txn); } + // updates get converted to insert messages, which should do a -1 on the + // logical row count when the messages are permanently applied + toku_ft_adjust_logical_row_count(ft_h->ft, 1); } void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc index adac96f4882..e31d80772d5 100644 --- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc +++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc @@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) { return rre->_cancelled = rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra); } -int toku_ft_recount_rows( - FT_HANDLE ft, - int (*progress_callback)( - uint64_t count, - uint64_t deleted, - void* progress_extra), - void* progress_extra) { - +int toku_ft_recount_rows(FT_HANDLE ft, + int (*progress_callback)(uint64_t count, + uint64_t deleted, + void* progress_extra), + void* progress_extra) { int ret = 0; - recount_rows_extra_t rre = { - progress_callback, - progress_extra, - 0, - false - }; + recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false}; ft_cursor c; ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false); - if (ret) return ret; + if (ret) + return ret; - toku_ft_cursor_set_check_interrupt_cb( - &c, - recount_rows_interrupt, - &rre); + toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre); ret = toku_ft_cursor_first(&c, recount_rows_found, &rre); while (FT_LIKELY(ret == 0)) { @@ -108,6 +98,7 @@ int toku_ft_recount_rows( if (rre._cancelled == false) { // update ft count toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys); + ft->ft->h->dirty = 1; ret = 0; } diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index 93d21233bf7..699fcc57603 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) { // must be returned in toku_ft_stat64. if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) { toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta); + if (ft->in_memory_logical_rows == (uint64_t)-1) { + toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1); + } } } diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h index dd070373e26..1aa2c203831 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h +++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h @@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error); -// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.) +// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.) uint64_t toku_ft_loader_get_rowset_budget_for_testing (void); int toku_ft_loader_finish_extractor(FTLOADER bl); diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc index 20f9363da1e..528c86a8f79 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader.cc +++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc @@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) { uint64_t toku_ft_loader_get_rowset_budget_for_testing (void) -// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613). +// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613). { return 16ULL*size_factor*1024ULL; } diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc index 58ba675eb7c..12e5fda226e 100644 --- a/storage/tokudb/PerconaFT/ft/node.cc +++ b/storage/tokudb/PerconaFT/ft/node.cc @@ -373,52 +373,48 @@ find_bounds_within_message_tree( } } -/** - * For each message in the ancestor's buffer (determined by childnum) that - * is key-wise between lower_bound_exclusive and upper_bound_inclusive, - * apply the message to the basement node. We treat the bounds as minus - * or plus infinity respectively if they are NULL. Do not mark the node - * as dirty (preserve previous state of 'dirty' bit). - */ +// For each message in the ancestor's buffer (determined by childnum) that +// is key-wise between lower_bound_exclusive and upper_bound_inclusive, +// apply the message to the basement node. We treat the bounds as minus +// or plus infinity respectively if they are NULL. Do not mark the node +// as dirty (preserve previous state of 'dirty' bit). static void bnc_apply_messages_to_basement_node( - FT_HANDLE t, // used for comparison function - BASEMENTNODE bn, // where to apply messages + FT_HANDLE t, // used for comparison function + BASEMENTNODE bn, // where to apply messages FTNODE ancestor, // the ancestor node where we can find messages to apply - int childnum, // which child buffer of ancestor contains messages we want - const pivot_bounds &bounds, // contains pivot key bounds of this basement node - txn_gc_info* gc_info, - bool* msgs_applied) { - + int childnum, // which child buffer of ancestor contains messages we want + const pivot_bounds & + bounds, // contains pivot key bounds of this basement node + txn_gc_info *gc_info, + bool *msgs_applied) { int r; NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); // Determine the offsets in the message trees between which we need to // apply messages from this buffer - STAT64INFO_S stats_delta = {0,0}; + STAT64INFO_S stats_delta = {0, 0}; uint64_t workdone_this_ancestor = 0; int64_t logical_rows_delta = 0; uint32_t stale_lbi, stale_ube; if (!bn->stale_ancestor_messages_applied) { - find_bounds_within_message_tree( - t->ft->cmp, - bnc->stale_message_tree, - &bnc->msg_buffer, - bounds, - &stale_lbi, - &stale_ube); + find_bounds_within_message_tree(t->ft->cmp, + bnc->stale_message_tree, + &bnc->msg_buffer, + bounds, + &stale_lbi, + &stale_ube); } else { stale_lbi = 0; stale_ube = 0; } uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree( - t->ft->cmp, - bnc->fresh_message_tree, - &bnc->msg_buffer, - bounds, - &fresh_lbi, - &fresh_ube); + find_bounds_within_message_tree(t->ft->cmp, + bnc->fresh_message_tree, + &bnc->msg_buffer, + bounds, + &fresh_lbi, + &fresh_ube); // We now know where all the messages we must apply are, so one of the // following 4 cases will do the application, depending on which of @@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node( // We have messages in multiple trees, so we grab all // the relevant messages' offsets and sort them by MSN, then apply // them in MSN order. - const int buffer_size = ((stale_ube - stale_lbi) + - (fresh_ube - fresh_lbi) + - bnc->broadcast_list.size()); + const int buffer_size = + ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + + bnc->broadcast_list.size()); toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); int32_t *offsets = reinterpret_cast(offsets_buf.get()); - struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; + struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets, + .i = 0}; // Populate offsets array with offsets to stale messages - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); + r = bnc->stale_message_tree + .iterate_on_range( + stale_lbi, stale_ube, &sfo_extra); assert_zero(r); // Then store fresh offsets, and mark them to be moved to stale later. - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); + r = bnc->fresh_message_tree + .iterate_and_mark_range( + fresh_lbi, fresh_ube, &sfo_extra); assert_zero(r); // Store offsets of all broadcast messages. - r = bnc->broadcast_list.iterate(&sfo_extra); + r = bnc->broadcast_list.iterate(&sfo_extra); assert_zero(r); invariant(sfo_extra.i == buffer_size); // Sort by MSN. - toku::sort::mergesort_r(offsets, buffer_size, bnc->msg_buffer); + toku::sort:: + mergesort_r(offsets, buffer_size, bnc->msg_buffer); // Apply the messages in MSN order. for (int i = 0; i < buffer_size; ++i) { *msgs_applied = true; - do_bn_apply_msg( - t, - bn, - &bnc->msg_buffer, - offsets[i], - gc_info, - &workdone_this_ancestor, - &stats_delta, - &logical_rows_delta); + do_bn_apply_msg(t, + bn, + &bnc->msg_buffer, + offsets[i], + gc_info, + &workdone_this_ancestor, + &stats_delta, + &logical_rows_delta); } } else if (stale_lbi == stale_ube) { - // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. + // No stale messages to apply, we just apply fresh messages, and mark + // them to be moved to stale later. struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, @@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta - }; - if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); + .logical_rows_delta = &logical_rows_delta}; + if (fresh_ube - fresh_lbi > 0) + *msgs_applied = true; + r = bnc->fresh_message_tree + .iterate_and_mark_range( + fresh_lbi, fresh_ube, &iter_extra); assert_zero(r); } else { invariant(fresh_lbi == fresh_ube); // No fresh messages to apply, we just apply stale messages. - if (stale_ube - stale_lbi > 0) *msgs_applied = true; + if (stale_ube - stale_lbi > 0) + *msgs_applied = true; struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, @@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta - }; + .logical_rows_delta = &logical_rows_delta}; - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); + r = bnc->stale_message_tree + .iterate_on_range( + stale_lbi, stale_ube, &iter_extra); assert_zero(r); } // // update stats // if (workdone_this_ancestor > 0) { - (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); + (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), + workdone_this_ancestor); } if (stats_delta.numbytes || stats_delta.numrows) { toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); } toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta); + bn->logical_rows_delta += logical_rows_delta; } static void diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h index ad0298e81c5..52eefec0936 100644 --- a/storage/tokudb/PerconaFT/ft/node.h +++ b/storage/tokudb/PerconaFT/ft/node.h @@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node { MSN max_msn_applied; // max message sequence number applied bool stale_ancestor_messages_applied; STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk + int64_t logical_rows_delta; }; typedef struct ftnode_leaf_basement_node *BASEMENTNODE; diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc index 1355f3739ee..19811373d16 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc @@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "portability/toku_stdlib.h" #include "ft/serialize/block_allocator.h" -#include "ft/serialize/block_allocator_strategy.h" +#include "ft/serialize/rbtree_mhs.h" #if TOKU_DEBUG_PARANOID -#define VALIDATE() validate() +#define VALIDATE() Validate() #else #define VALIDATE() #endif -static FILE *ba_trace_file = nullptr; - -void block_allocator::maybe_initialize_trace(void) { - const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH"); - if (ba_trace_path != nullptr) { - ba_trace_file = toku_os_fopen(ba_trace_path, "w"); - if (ba_trace_file == nullptr) { - fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), " - "but it could not be opened for writing (errno %d)\n", - ba_trace_path, get_maybe_error_errno()); - } else { - fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path); - } - } -} - -void block_allocator::maybe_close_trace() { - if (ba_trace_file != nullptr) { - int r = toku_os_fclose(ba_trace_file); - if (r != 0) { - fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n", - r, get_maybe_error_errno()); - } else { - fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n"); - } - } -} - -void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) { - // the alignment must be at least 512 and aligned with 512 to work with direct I/O - assert(alignment >= 512 && (alignment % 512) == 0); +void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning, + uint64_t alignment) { + // the alignment must be at least 512 and aligned with 512 to work with + // direct I/O + invariant(alignment >= 512 && (alignment % 512) == 0); _reserve_at_beginning = reserve_at_beginning; _alignment = alignment; _n_blocks = 0; - _blocks_array_size = 1; - XMALLOC_N(_blocks_array_size, _blocks_array); _n_bytes_in_use = reserve_at_beginning; - _strategy = BA_STRATEGY_FIRST_FIT; - - memset(&_trace_lock, 0, sizeof(toku_mutex_t)); - toku_mutex_init(&_trace_lock, nullptr); + _tree = new MhsRbTree::Tree(alignment); +} +void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) { + CreateInternal(reserve_at_beginning, alignment); + _tree->Insert({reserve_at_beginning, MAX_BYTE}); VALIDATE(); } -void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) { - _create_internal(reserve_at_beginning, alignment); - _trace_create(); +void BlockAllocator::Destroy() { + delete _tree; } -void block_allocator::destroy() { - toku_free(_blocks_array); - _trace_destroy(); - toku_mutex_destroy(&_trace_lock); -} - -void block_allocator::set_strategy(enum allocation_strategy strategy) { - _strategy = strategy; -} - -void block_allocator::grow_blocks_array_by(uint64_t n_to_add) { - if (_n_blocks + n_to_add > _blocks_array_size) { - uint64_t new_size = _n_blocks + n_to_add; - uint64_t at_least = _blocks_array_size * 2; - if (at_least > new_size) { - new_size = at_least; - } - _blocks_array_size = new_size; - XREALLOC_N(_blocks_array_size, _blocks_array); - } -} - -void block_allocator::grow_blocks_array() { - grow_blocks_array_by(1); -} - -void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, - struct blockpair *pairs, uint64_t n_blocks) { - _create_internal(reserve_at_beginning, alignment); - +void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning, + uint64_t alignment, + struct BlockPair *translation_pairs, + uint64_t n_blocks) { + CreateInternal(reserve_at_beginning, alignment); _n_blocks = n_blocks; - grow_blocks_array_by(_n_blocks); - memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair)); - std::sort(_blocks_array, _blocks_array + _n_blocks); - for (uint64_t i = 0; i < _n_blocks; i++) { - // Allocator does not support size 0 blocks. See block_allocator_free_block. - invariant(_blocks_array[i].size > 0); - invariant(_blocks_array[i].offset >= _reserve_at_beginning); - invariant(_blocks_array[i].offset % _alignment == 0); - _n_bytes_in_use += _blocks_array[i].size; + struct BlockPair *XMALLOC_N(n_blocks, pairs); + memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair)); + std::sort(pairs, pairs + n_blocks); + + if (pairs[0]._offset > reserve_at_beginning) { + _tree->Insert( + {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning}); } + for (uint64_t i = 0; i < _n_blocks; i++) { + // Allocator does not support size 0 blocks. See + // block_allocator_free_block. + invariant(pairs[i]._size > 0); + invariant(pairs[i]._offset >= _reserve_at_beginning); + invariant(pairs[i]._offset % _alignment == 0); + _n_bytes_in_use += pairs[i]._size; + + MhsRbTree::OUUInt64 free_size(MAX_BYTE); + MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size); + if (i < n_blocks - 1) { + MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset); + invariant(next_offset >= free_offset); + free_size = next_offset - free_offset; + if (free_size == 0) + continue; + } + _tree->Insert({free_offset, free_size}); + } + toku_free(pairs); VALIDATE(); - - _trace_create_from_blockpairs(); } // Effect: align a value by rounding up. -static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { +static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) { return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; } -struct block_allocator::blockpair * -block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) { - switch (_strategy) { - case BA_STRATEGY_FIRST_FIT: - return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment); - case BA_STRATEGY_BEST_FIT: - return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment); - case BA_STRATEGY_HEAT_ZONE: - return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat); - case BA_STRATEGY_PADDED_FIT: - return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment); - default: - abort(); - } -} - -// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). -void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) { - struct blockpair *bp; - +// Effect: Allocate a block. The resulting block must be aligned on the +// ba->alignment (which to make direct_io happy must be a positive multiple of +// 512). +void BlockAllocator::AllocBlock(uint64_t size, + uint64_t *offset) { // Allocator does not support size 0 blocks. See block_allocator_free_block. invariant(size > 0); - grow_blocks_array(); _n_bytes_in_use += size; + *offset = _tree->Remove(size); - uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment); - - if (_n_blocks == 0) { - // First and only block - assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use - _blocks_array[0].offset = align(_reserve_at_beginning, _alignment); - _blocks_array[0].size = size; - *offset = _blocks_array[0].offset; - goto done; - } else if (end_of_reserve + size <= _blocks_array[0].offset ) { - // Check to see if the space immediately after the reserve is big enough to hold the new block. - bp = &_blocks_array[0]; - memmove(bp + 1, bp, _n_blocks * sizeof(*bp)); - bp[0].offset = end_of_reserve; - bp[0].size = size; - *offset = end_of_reserve; - goto done; - } - - bp = choose_block_to_alloc_after(size, heat); - if (bp != nullptr) { - // our allocation strategy chose the space after `bp' to fit the new block - uint64_t answer_offset = align(bp->offset + bp->size, _alignment); - uint64_t blocknum = bp - _blocks_array; - invariant(&_blocks_array[blocknum] == bp); - invariant(blocknum < _n_blocks); - memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp)); - bp[1].offset = answer_offset; - bp[1].size = size; - *offset = answer_offset; - } else { - // It didn't fit anywhere, so fit it on the end. - assert(_n_blocks < _blocks_array_size); - bp = &_blocks_array[_n_blocks]; - uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment); - bp->offset = answer_offset; - bp->size = size; - *offset = answer_offset; - } - -done: _n_blocks++; VALIDATE(); - - _trace_alloc(size, heat, *offset); } -// Find the index in the blocks array that has a particular offset. Requires that the block exist. -// Use binary search so it runs fast. -int64_t block_allocator::find_block(uint64_t offset) { - VALIDATE(); - if (_n_blocks == 1) { - assert(_blocks_array[0].offset == offset); - return 0; - } - - uint64_t lo = 0; - uint64_t hi = _n_blocks; - while (1) { - assert(lo < hi); // otherwise no such block exists. - uint64_t mid = (lo + hi) / 2; - uint64_t thisoff = _blocks_array[mid].offset; - if (thisoff < offset) { - lo = mid + 1; - } else if (thisoff > offset) { - hi = mid; - } else { - return mid; - } - } -} - -// To support 0-sized blocks, we need to include size as an input to this function. +// To support 0-sized blocks, we need to include size as an input to this +// function. // All 0-sized blocks at the same offset can be considered identical, but // a 0-sized block can share offset with a non-zero sized block. -// The non-zero sized block is not exchangable with a zero sized block (or vice versa), -// so inserting 0-sized blocks can cause corruption here. -void block_allocator::free_block(uint64_t offset) { +// The non-zero sized block is not exchangable with a zero sized block (or vice +// versa), so inserting 0-sized blocks can cause corruption here. +void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) { VALIDATE(); - int64_t bn = find_block(offset); - assert(bn >= 0); // we require that there is a block with that offset. - _n_bytes_in_use -= _blocks_array[bn].size; - memmove(&_blocks_array[bn], &_blocks_array[bn + 1], - (_n_blocks - bn - 1) * sizeof(struct blockpair)); + _n_bytes_in_use -= size; + _tree->Insert({offset, size}); _n_blocks--; VALIDATE(); - - _trace_free(offset); } -uint64_t block_allocator::block_size(uint64_t offset) { - int64_t bn = find_block(offset); - assert(bn >=0); // we require that there is a block with that offset. - return _blocks_array[bn].size; +uint64_t BlockAllocator::AllocatedLimit() const { + MhsRbTree::Node *max_node = _tree->MaxNode(); + return rbn_offset(max_node).ToInt(); } -uint64_t block_allocator::allocated_limit() const { - if (_n_blocks == 0) { - return _reserve_at_beginning; - } else { - struct blockpair *last = &_blocks_array[_n_blocks - 1]; - return last->offset + last->size; - } -} - -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. +// Effect: Consider the blocks in sorted order. The reserved block at the +// beginning is number 0. The next one is number 1 and so forth. // Return the offset and size of the block with that number. // Return 0 if there is a block that big, return nonzero if b is too big. -int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) { - if (b ==0 ) { +int BlockAllocator::NthBlockInLayoutOrder(uint64_t b, + uint64_t *offset, + uint64_t *size) { + MhsRbTree::Node *x, *y; + if (b == 0) { *offset = 0; *size = _reserve_at_beginning; - return 0; + return 0; } else if (b > _n_blocks) { return -1; } else { - *offset =_blocks_array[b - 1].offset; - *size =_blocks_array[b - 1].size; + x = _tree->MinNode(); + for (uint64_t i = 1; i <= b; i++) { + y = x; + x = _tree->Successor(x); + } + *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt(); + *offset = (rbn_offset(y) + rbn_size(y)).ToInt(); return 0; } } +struct VisUnusedExtra { + TOKU_DB_FRAGMENTATION _report; + uint64_t _align; +}; + +static void VisUnusedCollector(void *extra, + MhsRbTree::Node *node, + uint64_t UU(depth)) { + struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra; + TOKU_DB_FRAGMENTATION report = v_e->_report; + uint64_t alignm = v_e->_align; + + MhsRbTree::OUUInt64 offset = rbn_offset(node); + MhsRbTree::OUUInt64 size = rbn_size(node); + MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm)); + uint64_t free_space = (offset + size - answer_offset).ToInt(); + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } +} // Requires: report->file_size_bytes is filled in // Requires: report->data_bytes is filled in // Requires: report->checkpoint_bytes_additional is filled in -void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) { - assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); +void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) { + invariant(_n_bytes_in_use == + report->data_bytes + report->checkpoint_bytes_additional); report->unused_bytes = 0; report->unused_blocks = 0; report->largest_unused_block = 0; - if (_n_blocks > 0) { - //Deal with space before block 0 and after reserve: - { - struct blockpair *bp = &_blocks_array[0]; - assert(bp->offset >= align(_reserve_at_beginning, _alignment)); - uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment); - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space between blocks: - for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) { - // Consider the space after blocknum - struct blockpair *bp = &_blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, _alignment); - uint64_t next_offset = bp[1].offset; - uint64_t free_space = next_offset - end_of_this_block; - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space after last block - { - struct blockpair *bp = &_blocks_array[_n_blocks-1]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, _alignment); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - } else { - // No blocks. Just the reserve. - uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } + struct VisUnusedExtra extra = {report, _alignment}; + _tree->InOrderVisitor(VisUnusedCollector, &extra); } -void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) { - report->data_bytes = _n_bytes_in_use; - report->data_blocks = _n_blocks; +void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) { + report->data_bytes = _n_bytes_in_use; + report->data_blocks = _n_blocks; report->file_size_bytes = 0; report->checkpoint_bytes_additional = 0; - get_unused_statistics(report); + UnusedStatistics(report); } -void block_allocator::validate() const { - uint64_t n_bytes_in_use = _reserve_at_beginning; - for (uint64_t i = 0; i < _n_blocks; i++) { - n_bytes_in_use += _blocks_array[i].size; - if (i > 0) { - assert(_blocks_array[i].offset > _blocks_array[i - 1].offset); - assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size ); - } +struct ValidateExtra { + uint64_t _bytes; + MhsRbTree::Node *_pre_node; +}; +static void VisUsedBlocksInOrder(void *extra, + MhsRbTree::Node *cur_node, + uint64_t UU(depth)) { + struct ValidateExtra *v_e = (struct ValidateExtra *)extra; + MhsRbTree::Node *pre_node = v_e->_pre_node; + // verify no overlaps + if (pre_node) { + invariant(rbn_size(pre_node) > 0); + invariant(rbn_offset(cur_node) > + rbn_offset(pre_node) + rbn_size(pre_node)); + MhsRbTree::OUUInt64 used_space = + rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node)); + v_e->_bytes += used_space.ToInt(); + } else { + v_e->_bytes += rbn_offset(cur_node).ToInt(); } - assert(n_bytes_in_use == _n_bytes_in_use); + v_e->_pre_node = cur_node; } -// Tracing - -void block_allocator::_trace_create(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n", - this, _reserve_at_beginning, _alignment); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_create_from_blockpairs(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ", - this, _reserve_at_beginning, _alignment); - for (uint64_t i = 0; i < _n_blocks; i++) { - fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ", - _blocks_array[i].offset, _blocks_array[i].size); - } - fprintf(ba_trace_file, "\n"); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_destroy(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_destroy %p\n", this); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - this, size, heat, offset); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_free(uint64_t offset) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } +void BlockAllocator::Validate() const { + _tree->ValidateBalance(); + _tree->ValidateMhs(); + struct ValidateExtra extra = {0, nullptr}; + _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra); + invariant(extra._bytes == _n_bytes_in_use); } diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h index 9b2c1553e7f..648ea9a9ef2 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h +++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h @@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "portability/toku_pthread.h" #include "portability/toku_stdint.h" #include "portability/toku_stdlib.h" +#include "ft/serialize/rbtree_mhs.h" // Block allocator. // @@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // The allocation of block numbers is handled elsewhere. // // When creating a block allocator we also specify a certain-sized -// block at the beginning that is preallocated (and cannot be allocated or freed) +// block at the beginning that is preallocated (and cannot be allocated or +// freed) // // We can allocate blocks of a particular size at a particular location. -// We can allocate blocks of a particular size at a location chosen by the allocator. // We can free blocks. // We can determine the size of a block. - -class block_allocator { -public: +#define MAX_BYTE 0xffffffffffffffff +class BlockAllocator { + public: static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096; // How much must be reserved at the beginning for the block? - // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. + // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 + // pointer for each root. // So 4096 should be enough. static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096; - - static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0, + + static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == + 0, "block allocator header must have proper alignment"); - static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2; + static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = + BLOCK_ALLOCATOR_HEADER_RESERVE * 2; - enum allocation_strategy { - BA_STRATEGY_FIRST_FIT = 1, - BA_STRATEGY_BEST_FIT, - BA_STRATEGY_PADDED_FIT, - BA_STRATEGY_HEAT_ZONE + struct BlockPair { + uint64_t _offset; + uint64_t _size; + BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} + int operator<(const struct BlockPair &rhs) const { + return _offset < rhs._offset; + } + int operator<(const uint64_t &o) const { return _offset < o; } }; - struct blockpair { - uint64_t offset; - uint64_t size; - blockpair(uint64_t o, uint64_t s) : - offset(o), size(s) { - } - int operator<(const struct blockpair &rhs) const { - return offset < rhs.offset; - } - int operator<(const uint64_t &o) const { - return offset < o; - } - }; - - // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. - // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING + // bytes are not put into a block. + // The default allocation strategy is first fit + // (BA_STRATEGY_FIRST_FIT) // All blocks be start on a multiple of ALIGNMENT. // Aborts if we run out of memory. // Parameters - // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // reserve_at_beginning (IN) Size of reserved block at beginning. + // This size does not have to be aligned. // alignment (IN) Block alignment. - void create(uint64_t reserve_at_beginning, uint64_t alignment); + void Create(uint64_t reserve_at_beginning, uint64_t alignment); - // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. - // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) - // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs' + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING + // bytes are not put into a block. + // The allocator is initialized to contain `n_blocks' of BlockPairs, + // taken from `pairs' // All blocks be start on a multiple of ALIGNMENT. // Aborts if we run out of memory. // Parameters // pairs, unowned array of pairs to copy // n_blocks, Size of pairs array - // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // reserve_at_beginning (IN) Size of reserved block at beginning. + // This size does not have to be aligned. // alignment (IN) Block alignment. - void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, - struct blockpair *pairs, uint64_t n_blocks); + void CreateFromBlockPairs(uint64_t reserve_at_beginning, + uint64_t alignment, + struct BlockPair *pairs, + uint64_t n_blocks); // Effect: Destroy this block allocator - void destroy(); + void Destroy(); - // Effect: Set the allocation strategy that the allocator should use - // Requires: No other threads are operating on this block allocator - void set_strategy(enum allocation_strategy strategy); - - // Effect: Allocate a block of the specified size at an address chosen by the allocator. + // Effect: Allocate a block of the specified size at an address chosen by + // the allocator. // Aborts if anything goes wrong. // The block address will be a multiple of the alignment. // Parameters: - // size (IN): The size of the block. (The size does not have to be aligned.) + // size (IN): The size of the block. (The size does not have to be + // aligned.) // offset (OUT): The location of the block. - // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint) - // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary - void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset); + // block soon (perhaps in the next checkpoint) + // Heat values are lexiographically ordered (like integers), + // but their specific values are arbitrary + void AllocBlock(uint64_t size, uint64_t *offset); // Effect: Free the block at offset. // Requires: There must be a block currently allocated at that offset. // Parameters: // offset (IN): The offset of the block. - void free_block(uint64_t offset); + void FreeBlock(uint64_t offset, uint64_t size); - // Effect: Return the size of the block that starts at offset. - // Requires: There must be a block currently allocated at that offset. - // Parameters: - // offset (IN): The offset of the block. - uint64_t block_size(uint64_t offset); - - // Effect: Check to see if the block allocator is OK. This may take a long time. + // Effect: Check to see if the block allocator is OK. This may take a long + // time. // Usage Hints: Probably only use this for unit tests. // TODO: Private? - void validate() const; + void Validate() const; // Effect: Return the unallocated block address of "infinite" size. - // That is, return the smallest address that is above all the allocated blocks. - uint64_t allocated_limit() const; + // That is, return the smallest address that is above all the allocated + // blocks. + uint64_t AllocatedLimit() const; - // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. + // Effect: Consider the blocks in sorted order. The reserved block at the + // beginning is number 0. The next one is number 1 and so forth. // Return the offset and size of the block with that number. // Return 0 if there is a block that big, return nonzero if b is too big. // Rationale: This is probably useful only for tests. - int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size); + int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size); // Effect: Fill in report to indicate how the file is used. - // Requires: + // Requires: // report->file_size_bytes is filled in // report->data_bytes is filled in // report->checkpoint_bytes_additional is filled in - void get_unused_statistics(TOKU_DB_FRAGMENTATION report); + void UnusedStatistics(TOKU_DB_FRAGMENTATION report); // Effect: Fill in report->data_bytes with the number of bytes in use - // Fill in report->data_blocks with the number of blockpairs in use + // Fill in report->data_blocks with the number of BlockPairs in use // Fill in unused statistics using this->get_unused_statistics() // Requires: // report->file_size is ignored on return // report->checkpoint_bytes_additional is ignored on return - void get_statistics(TOKU_DB_FRAGMENTATION report); + void Statistics(TOKU_DB_FRAGMENTATION report); - // Block allocator tracing. - // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file - // should be written to. - // - Trace may be replayed by ba_trace_replay tool in tools/ directory - // eg: "cat mytracefile | ba_trace_replay" - static void maybe_initialize_trace(); - static void maybe_close_trace(); + virtual ~BlockAllocator(){}; -private: - void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment); - void grow_blocks_array_by(uint64_t n_to_add); - void grow_blocks_array(); - int64_t find_block(uint64_t offset); - struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat); - - // Tracing - toku_mutex_t _trace_lock; - void _trace_create(void); - void _trace_create_from_blockpairs(void); - void _trace_destroy(void); - void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset); - void _trace_free(uint64_t offset); + private: + void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment); // How much to reserve at the beginning uint64_t _reserve_at_beginning; @@ -203,12 +181,8 @@ private: uint64_t _alignment; // How many blocks uint64_t _n_blocks; - // How big is the blocks_array. Must be >= n_blocks. - uint64_t _blocks_array_size; - // These blocks are sorted by address. - struct blockpair *_blocks_array; - // Including the reserve_at_beginning uint64_t _n_bytes_in_use; - // The allocation strategy are we using - enum allocation_strategy _strategy; + + // These blocks are sorted by address. + MhsRbTree::Tree *_tree; }; diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc deleted file mode 100644 index 62bb8fc4a87..00000000000 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -#include - -#include - -#include "portability/toku_assert.h" - -#include "ft/serialize/block_allocator_strategy.h" - -static uint64_t _align(uint64_t value, uint64_t ba_alignment) { - return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; -} - -static uint64_t _roundup_to_power_of_two(uint64_t value) { - uint64_t r = 4096; - while (r < value) { - r *= 2; - invariant(r > 0); - } - return r; -} - -// First fit block allocation -static struct block_allocator::blockpair * -_first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t max_padding) { - if (n_blocks == 1) { - // won't enter loop, can't underflow the direction < 0 case - return nullptr; - } - - struct block_allocator::blockpair *bp = &blocks_array[0]; - for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; - n_spaces_to_check--, bp++) { - // Consider the space after bp - uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; - uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); - if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1] - invariant(bp - blocks_array < (int64_t) n_blocks); - return bp; - } - } - return nullptr; -} - -static struct block_allocator::blockpair * -_first_fit_bw(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) { - if (n_blocks == 1) { - // won't enter loop, can't underflow the direction < 0 case - return nullptr; - } - - struct block_allocator::blockpair *bp = &blocks_array[-1]; - for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; - n_spaces_to_check--, bp--) { - // Consider the space after bp - uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; - uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); - if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) { - invariant(blocks_array - bp < (int64_t) n_blocks); - return bp; - } - } - return nullptr; -} - -struct block_allocator::blockpair * -block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - return _first_fit(blocks_array, n_blocks, size, alignment, 0); -} - -// Best fit block allocation -struct block_allocator::blockpair * -block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - struct block_allocator::blockpair *best_bp = nullptr; - uint64_t best_hole_size = 0; - for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { - // Consider the space after blocknum - struct block_allocator::blockpair *bp = &blocks_array[blocknum]; - uint64_t possible_offset = _align(bp->offset + bp->size, alignment); - uint64_t possible_end_offset = possible_offset + size; - if (possible_end_offset <= bp[1].offset) { - // It fits here. Is it the best fit? - uint64_t hole_size = bp[1].offset - possible_end_offset; - if (best_bp == nullptr || hole_size < best_hole_size) { - best_hole_size = hole_size; - best_bp = bp; - } - } - } - return best_bp; -} - -static uint64_t padded_fit_alignment = 4096; - -// TODO: These compiler specific directives should be abstracted in a portability header -// portability/toku_compiler.h? -__attribute__((__constructor__)) -static void determine_padded_fit_alignment_from_env(void) { - // TODO: Should be in portability as 'toku_os_getenv()?' - const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT"); - if (s != nullptr && strlen(s) > 0) { - const int64_t alignment = strtoll(s, nullptr, 10); - if (alignment <= 0) { - fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), " - "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n", - s, padded_fit_alignment); - } else { - padded_fit_alignment = _roundup_to_power_of_two(alignment); - fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", - padded_fit_alignment); - } - } -} - -// First fit into a block that is oversized by up to max_padding. -// The hope is that if we purposefully waste a bit of space at allocation -// time we'll be more likely to reuse this block later. -struct block_allocator::blockpair * -block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment); -} - -static double hot_zone_threshold = 0.85; - -// TODO: These compiler specific directives should be abstracted in a portability header -// portability/toku_compiler.h? -__attribute__((__constructor__)) -static void determine_hot_zone_threshold_from_env(void) { - // TODO: Should be in portability as 'toku_os_getenv()?' - const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD"); - if (s != nullptr && strlen(s) > 0) { - const double hot_zone = strtod(s, nullptr); - if (hot_zone < 1 || hot_zone > 99) { - fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), " - "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s); - hot_zone_threshold = 85 / 100; - } else { - fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s); - hot_zone_threshold = hot_zone / 100; - } - } -} - -struct block_allocator::blockpair * -block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t heat) { - if (heat > 0) { - struct block_allocator::blockpair *bp, *boundary_bp; - - // Hot allocation. Find the beginning of the hot zone. - boundary_bp = &blocks_array[n_blocks - 1]; - uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment); - uint64_t hot_zone_offset = static_cast(hot_zone_threshold * highest_offset); - - boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset); - uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp; - uint64_t blocks_outside_zone = boundary_bp - blocks_array; - invariant(blocks_in_zone + blocks_outside_zone == n_blocks); - - if (blocks_in_zone > 0) { - // Find the first fit in the hot zone, going forward. - bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0); - if (bp != nullptr) { - return bp; - } - } - if (blocks_outside_zone > 0) { - // Find the first fit in the cold zone, going backwards. - bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]); - if (bp != nullptr) { - return bp; - } - } - } else { - // Cold allocations are simply first-fit from the beginning. - return _first_fit(blocks_array, n_blocks, size, alignment, 0); - } - return nullptr; -} diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc index 7101ba9f58c..d2532134d96 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc @@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/ft-internal.h" // TODO: reorganize this dependency (FT-303) -#include "ft/ft-ops.h" // for toku_maybe_truncate_file +#include "ft/ft-ops.h" // for toku_maybe_truncate_file #include "ft/serialize/block_table.h" #include "ft/serialize/rbuf.h" #include "ft/serialize/wbuf.h" #include "ft/serialize/block_allocator.h" - #include "util/nb_mutex.h" #include "util/scoped_malloc.h" // indicates the end of a freelist -static const BLOCKNUM freelist_null = { -1 }; +static const BLOCKNUM freelist_null = {-1}; // value of block_translation_pair.size if blocknum is unused -static const DISKOFF size_is_free = (DISKOFF) -1; +static const DISKOFF size_is_free = (DISKOFF)-1; -// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock -static const DISKOFF diskoff_unused = (DISKOFF) -2; +// value of block_translation_pair.u.diskoff if blocknum is used but does not +// yet have a diskblock +static const DISKOFF diskoff_unused = (DISKOFF)-2; -void block_table::_mutex_lock() { - toku_mutex_lock(&_mutex); -} +void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); } -void block_table::_mutex_unlock() { - toku_mutex_unlock(&_mutex); -} +void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); } // TODO: Move lock to FT void toku_ft_lock(FT ft) { @@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) { bt->_mutex_unlock(); } -// There are two headers: the reserve must fit them both and be suitably aligned. -static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0, +// There are two headers: the reserve must fit them both and be suitably +// aligned. +static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE % + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT == + 0, "Block allocator's header reserve must be suitibly aligned"); -static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == - block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - "Block allocator's total header reserve must exactly fit two headers"); +static_assert( + BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + "Block allocator's total header reserve must exactly fit two headers"); // does NOT initialize the block allocator: the caller is responsible void block_table::_create_internal() { @@ -99,25 +98,30 @@ void block_table::_create_internal() { memset(&_inprogress, 0, sizeof(struct translation)); memset(&_checkpointed, 0, sizeof(struct translation)); memset(&_mutex, 0, sizeof(_mutex)); + _bt_block_allocator = new BlockAllocator(); toku_mutex_init(&_mutex, nullptr); nb_mutex_init(&_safe_file_size_lock); } -// Fill in the checkpointed translation from buffer, and copy checkpointed to current. -// The one read from disk is the last known checkpointed one, so we are keeping it in -// place and then setting current (which is never stored on disk) for current use. -// The translation_buffer has translation only, we create the rest of the block_table. -int block_table::create_from_buffer(int fd, - DISKOFF location_on_disk, //Location of translation_buffer - DISKOFF size_on_disk, - unsigned char *translation_buffer) { +// Fill in the checkpointed translation from buffer, and copy checkpointed to +// current. +// The one read from disk is the last known checkpointed one, so we are keeping +// it in +// place and then setting current (which is never stored on disk) for current +// use. +// The translation_buffer has translation only, we create the rest of the +// block_table. +int block_table::create_from_buffer( + int fd, + DISKOFF location_on_disk, // Location of translation_buffer + DISKOFF size_on_disk, + unsigned char *translation_buffer) { // Does not initialize the block allocator _create_internal(); // Deserialize the translation and copy it to current - int r = _translation_deserialize_from_buffer(&_checkpointed, - location_on_disk, size_on_disk, - translation_buffer); + int r = _translation_deserialize_from_buffer( + &_checkpointed, location_on_disk, size_on_disk, translation_buffer); if (r != 0) { return r; } @@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd, invariant(file_size >= 0); _safe_file_size = file_size; - // Gather the non-empty translations and use them to create the block allocator + // Gather the non-empty translations and use them to create the block + // allocator toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b * - sizeof(struct block_allocator::blockpair)); - struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); + sizeof(struct BlockAllocator::BlockPair)); + struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); uint64_t n_pairs = 0; for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) { struct block_translation_pair pair = _checkpointed.block_translation[i]; if (pair.size > 0) { invariant(pair.u.diskoff != diskoff_unused); - pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size); + pairs[n_pairs++] = + BlockAllocator::BlockPair(pair.u.diskoff, pair.size); } } - _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT, - pairs, n_pairs); + _bt_block_allocator->CreateFromBlockPairs( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT, + pairs, + n_pairs); return 0; } @@ -155,8 +163,10 @@ void block_table::create() { _create_internal(); _checkpointed.type = TRANSLATION_CHECKPOINTED; - _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); - _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b; + _checkpointed.smallest_never_used_blocknum = + make_blocknum(RESERVED_BLOCKNUMS); + _checkpointed.length_of_array = + _checkpointed.smallest_never_used_blocknum.b; _checkpointed.blocknum_freelist_head = freelist_null; XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation); for (int64_t i = 0; i < _checkpointed.length_of_array; i++) { @@ -164,12 +174,13 @@ void block_table::create() { _checkpointed.block_translation[i].u.diskoff = diskoff_unused; } - // we just created a default checkpointed, now copy it to current. + // we just created a default checkpointed, now copy it to current. _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); // Create an empty block allocator. - _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT); + _bt_block_allocator->Create( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT); } // TODO: Refactor with FT-303 @@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) { void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) { toku_mutex_assert_locked(&_mutex); - uint64_t new_size_needed = _bt_block_allocator.allocated_limit(); - //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. - if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) { + uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit(); + // Save a call to toku_os_get_file_size (kernel call) if unlikely to be + // useful. + if (new_size_needed < size_needed_before && + new_size_needed < _safe_file_size) { nb_mutex_lock(&_safe_file_size_lock, &_mutex); // Must hold _safe_file_size_lock to change _safe_file_size. if (new_size_needed < _safe_file_size) { int64_t safe_file_size_before = _safe_file_size; - // Not safe to use the 'to-be-truncated' portion until truncate is done. + // Not safe to use the 'to-be-truncated' portion until truncate is + // done. _safe_file_size = new_size_needed; _mutex_unlock(); uint64_t size_after; - toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); + toku_maybe_truncate_file( + fd, new_size_needed, safe_file_size_before, &size_after); _mutex_lock(); _safe_file_size = size_after; @@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) { _mutex_unlock(); } -void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) { - // We intend to malloc a fresh block, so the incoming translation should be empty +void block_table::_copy_translation(struct translation *dst, + struct translation *src, + enum translation_type newtype) { + // We intend to malloc a fresh block, so the incoming translation should be + // empty invariant_null(dst->block_translation); invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); invariant(newtype == TRANSLATION_DEBUG || - (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || - (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); + (src->type == TRANSLATION_CURRENT && + newtype == TRANSLATION_INPROGRESS) || + (src->type == TRANSLATION_CHECKPOINTED && + newtype == TRANSLATION_CURRENT)); dst->type = newtype; dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; - dst->blocknum_freelist_head = src->blocknum_freelist_head; + dst->blocknum_freelist_head = src->blocknum_freelist_head; - // destination btt is of fixed size. Allocate + memcpy the exact length necessary. + // destination btt is of fixed size. Allocate + memcpy the exact length + // necessary. dst->length_of_array = dst->smallest_never_used_blocknum.b; XMALLOC_N(dst->length_of_array, dst->block_translation); - memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation)); + memcpy(dst->block_translation, + src->block_translation, + dst->length_of_array * sizeof(*dst->block_translation)); // New version of btt is not yet stored on disk. dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = + diskoff_unused; } int64_t block_table::get_blocks_in_use_unlocked() { @@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() { struct translation *t = &_current; int64_t num_blocks = 0; { - //Reserved blocknums do not get upgraded; They are part of the header. - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + // Reserved blocknums do not get upgraded; They are part of the header. + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; + b.b++) { if (t->block_translation[b.b].size != size_is_free) { num_blocks++; } @@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() { } void block_table::_maybe_optimize_translation(struct translation *t) { - //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just - //on a free list. Doing so requires us to regenerate the free list. - //This is O(n) work, so do it only if you're already doing that. + // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums + // instead of just + // on a free list. Doing so requires us to regenerate the free list. + // This is O(n) work, so do it only if you're already doing that. BLOCKNUM b; paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - //Calculate how large the free suffix is. + // Calculate how large the free suffix is. int64_t freed; { - for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { - if (t->block_translation[b.b-1].size != size_is_free) { + for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; + b.b--) { + if (t->block_translation[b.b - 1].size != size_is_free) { break; } } freed = t->smallest_never_used_blocknum.b - b.b; } - if (freed>0) { + if (freed > 0) { t->smallest_never_used_blocknum.b = b.b; - if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { - //We're using more memory than necessary to represent this now. Reduce. + if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) { + // We're using more memory than necessary to represent this now. + // Reduce. uint64_t new_length = t->smallest_never_used_blocknum.b * 2; XREALLOC_N(new_length, t->block_translation); t->length_of_array = new_length; - //No need to zero anything out. + // No need to zero anything out. } - //Regenerate free list. + // Regenerate free list. t->blocknum_freelist_head.b = freelist_null.b; - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; + b.b++) { if (t->block_translation[b.b].size == size_is_free) { - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; + t->block_translation[b.b].u.next_free_blocknum = + t->blocknum_freelist_head; + t->blocknum_freelist_head = b; } } } @@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() { } void block_table::note_skipped_checkpoint() { - //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header + // Purpose, alert block translation that the checkpoint was skipped, e.x. + // for a non-dirty header _mutex_lock(); paranoid_invariant_notnull(_inprogress.block_translation); _checkpoint_skipped = true; _mutex_unlock(); } -// Purpose: free any disk space used by previous checkpoint that isn't in use by either +// Purpose: free any disk space used by previous checkpoint that isn't in use by +// either // - current state // - in-progress checkpoint // capture inprogress as new checkpointed. @@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() { void block_table::note_end_checkpoint(int fd) { // Free unused blocks _mutex_lock(); - uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit(); + uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit(); paranoid_invariant_notnull(_inprogress.block_translation); if (_checkpoint_skipped) { toku_free(_inprogress.block_translation); @@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) { goto end; } - //Make certain inprogress was allocated space on disk - assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); - assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); + // Make certain inprogress was allocated space on disk + invariant( + _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); + invariant( + _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > + 0); { struct translation *t = &_checkpointed; for (int64_t i = 0; i < t->length_of_array; i++) { struct block_translation_pair *pair = &t->block_translation[i]; - if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) { - assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair)); - _bt_block_allocator.free_block(pair->u.diskoff); + if (pair->size > 0 && + !_translation_prevents_freeing( + &_inprogress, make_blocknum(i), pair)) { + invariant(!_translation_prevents_freeing( + &_current, make_blocknum(i), pair)); + _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size); } } toku_free(_checkpointed.block_translation); @@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) { return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; } -void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { +void block_table::_verify_valid_blocknum(struct translation *UU(t), + BLOCKNUM UU(b)) { invariant(_is_valid_blocknum(t, b)); } -bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { +bool block_table::_is_valid_freeable_blocknum(struct translation *t, + BLOCKNUM b) { invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; } // should be freeable -void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { +void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), + BLOCKNUM UU(b)) { invariant(_is_valid_freeable_blocknum(t, b)); } // Also used only in ft-serialize-test. -void block_table::block_free(uint64_t offset) { +void block_table::block_free(uint64_t offset, uint64_t size) { _mutex_lock(); - _bt_block_allocator.free_block(offset); + _bt_block_allocator->FreeBlock(offset, size); _mutex_unlock(); } int64_t block_table::_calculate_size_on_disk(struct translation *t) { - return 8 + // smallest_never_used_blocknum - 8 + // blocknum_freelist_head - t->smallest_never_used_blocknum.b * 16 + // Array - 4; // 4 for checksum + return 8 + // smallest_never_used_blocknum + 8 + // blocknum_freelist_head + t->smallest_never_used_blocknum.b * 16 + // Array + 4; // 4 for checksum } -// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. -bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { - return t->block_translation && - b.b < t->smallest_never_used_blocknum.b && +// We cannot free the disk space allocated to this blocknum if it is still in +// use by the given translation table. +bool block_table::_translation_prevents_freeing( + struct translation *t, + BLOCKNUM b, + struct block_translation_pair *old_pair) { + return t->block_translation && b.b < t->smallest_never_used_blocknum.b && old_pair->u.diskoff == t->block_translation[b.b].u.diskoff; } -void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) { +void block_table::_realloc_on_disk_internal(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + FT ft, + bool for_checkpoint) { toku_mutex_assert_locked(&_mutex); ft_set_dirty(ft, for_checkpoint); struct translation *t = &_current; struct block_translation_pair old_pair = t->block_translation[b.b]; - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) || - _translation_prevents_freeing(&_checkpointed, b, &old_pair)); - if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { - _bt_block_allocator.free_block(old_pair.u.diskoff); + // Free the old block if it is not still in use by the checkpoint in + // progress or the previous checkpoint + bool cannot_free = + (!for_checkpoint && + _translation_prevents_freeing(&_inprogress, b, &old_pair)) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair); + if (!cannot_free && old_pair.u.diskoff != diskoff_unused) { + _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size); } uint64_t allocator_offset = diskoff_unused; @@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o if (size > 0) { // Allocate a new block if the size is greater than 0, // if the size is just 0, offset will be set to diskoff_unused - _bt_block_allocator.alloc_block(size, heat, &allocator_offset); + _bt_block_allocator->AllocBlock(size, &allocator_offset); } t->block_translation[b.b].u.diskoff = allocator_offset; *offset = allocator_offset; - //Update inprogress btt if appropriate (if called because Pending bit is set). + // Update inprogress btt if appropriate (if called because Pending bit is + // set). if (for_checkpoint) { paranoid_invariant(b.b < _inprogress.length_of_array); _inprogress.block_translation[b.b] = t->block_translation[b.b]; } } -void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) { +void block_table::_ensure_safe_write_unlocked(int fd, + DISKOFF block_size, + DISKOFF block_offset) { // Requires: holding _mutex uint64_t size_needed = block_size + block_offset; if (size_needed > _safe_file_size) { @@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF _mutex_unlock(); int64_t size_after; - toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after); + toku_maybe_preallocate_in_file( + fd, size_needed, _safe_file_size, &size_after); _mutex_lock(); _safe_file_size = size_after; @@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF } } -void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) { +void block_table::realloc_on_disk(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + FT ft, + int fd, + bool for_checkpoint) { _mutex_lock(); struct translation *t = &_current; _verify_valid_freeable_blocknum(t, b); - _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat); + _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint); _ensure_safe_write_unlocked(fd, size, *offset); _mutex_unlock(); @@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) { return pair->size == 0 && pair->u.diskoff == diskoff_unused; } -// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. -// The space must be 512-byte aligned (both the starting address and the size). -// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. +// Effect: figure out where to put the inprogress btt on disk, allocate space +// for it there. +// The space must be 512-byte aligned (both the starting address and the +// size). +// As a result, the allcoated space may be a little bit bigger (up to the next +// 512-byte boundary) than the actual btt. void block_table::_alloc_inprogress_translation_on_disk_unlocked() { toku_mutex_assert_locked(&_mutex); struct translation *t = &_inprogress; paranoid_invariant_notnull(t->block_translation); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - //Each inprogress is allocated only once + // Each inprogress is allocated only once paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b])); - //Allocate a new block + // Allocate a new block int64_t size = _calculate_size_on_disk(t); uint64_t offset; - _bt_block_allocator.alloc_block(size, 0, &offset); + _bt_block_allocator->AllocBlock(size, &offset); t->block_translation[b.b].u.diskoff = offset; - t->block_translation[b.b].size = size; + t->block_translation[b.b].size = size; } // Effect: Serializes the blocktable to a wbuf (which starts uninitialized) -// A clean shutdown runs checkpoint start so that current and inprogress are copies. -// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) -// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. -// It *is* guaranteed that we can read up to the next 512-byte boundary, however -void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w, - int64_t *address, int64_t *size) { +// A clean shutdown runs checkpoint start so that current and inprogress are +// copies. +// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the +// total length is a multiple of 512 (so we pad with zeros at the end if +// needd) +// The address is guaranteed to be 512-byte aligned, but the size is not +// guaranteed. +// It *is* guaranteed that we can read up to the next 512-byte boundary, +// however +void block_table::serialize_translation_to_wbuf(int fd, + struct wbuf *w, + int64_t *address, + int64_t *size) { _mutex_lock(); struct translation *t = &_inprogress; BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy. + _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block + // must be 512-byte + // aligned to make + // O_DIRECT happy. uint64_t size_translation = _calculate_size_on_disk(t); - uint64_t size_aligned = roundup_to_multiple(512, size_translation); - assert((int64_t)size_translation==t->block_translation[b.b].size); + uint64_t size_aligned = roundup_to_multiple(512, size_translation); + invariant((int64_t)size_translation == t->block_translation[b.b].size); { - //Init wbuf + // Init wbuf if (0) - printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); + printf( + "%s:%d writing translation table of size_translation %" PRIu64 + " at %" PRId64 "\n", + __FILE__, + __LINE__, + size_translation, + t->block_translation[b.b].u.diskoff); char *XMALLOC_N_ALIGNED(512, size_aligned, buf); - for (uint64_t i=size_translation; ismallest_never_used_blocknum); - wbuf_BLOCKNUM(w, t->blocknum_freelist_head); + wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum); + wbuf_BLOCKNUM(w, t->blocknum_freelist_head); int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { + for (i = 0; i < t->smallest_never_used_blocknum.b; i++) { if (0) - printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); + printf("%s:%d %" PRId64 ",%" PRId64 "\n", + __FILE__, + __LINE__, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); wbuf_DISKOFF(w, t->block_translation[i].size); } uint32_t checksum = toku_x1764_finish(&w->checksum); wbuf_int(w, checksum); *address = t->block_translation[b.b].u.diskoff; - *size = size_translation; - assert((*address)%512 == 0); + *size = size_translation; + invariant((*address) % 512 == 0); _ensure_safe_write_unlocked(fd, size_aligned, *address); _mutex_unlock(); } -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { +// Perhaps rename: purpose is get disk address of a block, given its blocknum +// (blockid?) +void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size) { struct translation *t = &_current; _verify_valid_blocknum(t, b); if (offset) { @@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF } } -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { +// Perhaps rename: purpose is get disk address of a block, given its blocknum +// (blockid?) +void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size) { _mutex_lock(); _translate_blocknum_to_offset_size_unlocked(b, offset, size); _mutex_unlock(); @@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, // given that one more never-used blocknum will soon be used. void block_table::_maybe_expand_translation(struct translation *t) { if (t->length_of_array <= t->smallest_never_used_blocknum.b) { - //expansion is necessary + // expansion is necessary uint64_t new_length = t->smallest_never_used_blocknum.b * 2; XREALLOC_N(new_length, t->block_translation); uint64_t i; for (i = t->length_of_array; i < new_length; i++) { t->block_translation[i].u.next_free_blocknum = freelist_null; - t->block_translation[i].size = size_is_free; + t->block_translation[i].size = size_is_free; } t->length_of_array = new_length; } @@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { if (t->blocknum_freelist_head.b == freelist_null.b) { // no previously used blocknums are available // use a never used blocknum - _maybe_expand_translation(t); //Ensure a never used blocknums is available + _maybe_expand_translation( + t); // Ensure a never used blocknums is available result = t->smallest_never_used_blocknum; t->smallest_never_used_blocknum.b++; } else { // reuse a previously used blocknum @@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; t->blocknum_freelist_head = next; } - //Verify the blocknum is free + // Verify the blocknum is free paranoid_invariant(t->block_translation[result.b].size == size_is_free); - //blocknum is not free anymore + // blocknum is not free anymore t->block_translation[result.b].u.diskoff = diskoff_unused; - t->block_translation[result.b].size = 0; + t->block_translation[result.b].size = 0; _verify_valid_freeable_blocknum(t, result); *res = result; ft_set_dirty(ft, false); @@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) { _mutex_unlock(); } -void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) { +void block_table::_free_blocknum_in_translation(struct translation *t, + BLOCKNUM b) { _verify_valid_freeable_blocknum(t, b); paranoid_invariant(t->block_translation[b.b].size != size_is_free); - t->block_translation[b.b].size = size_is_free; + t->block_translation[b.b].size = size_is_free; t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; + t->blocknum_freelist_head = b; } // Effect: Free a blocknum. // If the blocknum holds the only reference to a block on disk, free that block -void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) { +void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, + FT ft, + bool for_checkpoint) { toku_mutex_assert_locked(&_mutex); BLOCKNUM b = *bp; - bp->b = 0; //Remove caller's reference. + bp->b = 0; // Remove caller's reference. struct block_translation_pair old_pair = _current.block_translation[b.b]; _free_blocknum_in_translation(&_current, b); if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + paranoid_invariant(ft->checkpoint_header->type == + FT_CHECKPOINT_INPROGRESS); _free_blocknum_in_translation(&_inprogress, b); } - //If the size is 0, no disk block has ever been assigned to this blocknum. + // If the size is 0, no disk block has ever been assigned to this blocknum. if (old_pair.size > 0) { - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - (_translation_prevents_freeing(&_inprogress, b, &old_pair) || - _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + // Free the old block if it is not still in use by the checkpoint in + // progress or the previous checkpoint + bool cannot_free = + _translation_prevents_freeing(&_inprogress, b, &old_pair) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair); if (!cannot_free) { - _bt_block_allocator.free_block(old_pair.u.diskoff); + _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size); } - } - else { - paranoid_invariant(old_pair.size==0); + } else { + paranoid_invariant(old_pair.size == 0); paranoid_invariant(old_pair.u.diskoff == diskoff_unused); } ft_set_dirty(ft, for_checkpoint); @@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() { void block_table::free_unused_blocknums(BLOCKNUM root) { _mutex_lock(); int64_t smallest = _current.smallest_never_used_blocknum.b; - for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { + for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) { if (i == root.b) { continue; } BLOCKNUM b = make_blocknum(i); if (_current.block_translation[b.b].size == 0) { - invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused); + invariant(_current.block_translation[b.b].u.diskoff == + diskoff_unused); _free_blocknum_in_translation(&_current, b); } } @@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) { goto cleanup; } } - cleanup: +cleanup: _mutex_unlock(); return ok; } // Verify there are no data blocks except root. -// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. +// TODO(leif): This actually takes a lock, but I don't want to fix all the +// callers right now. void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) { paranoid_invariant(_no_data_blocks_except_root(root)); } @@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) { if (t->block_translation) { BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); - fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); - fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); - fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); - fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); + fprintf(f, + " smallest_never_used_blocknum[%" PRId64 "]", + t->smallest_never_used_blocknum.b); + fprintf(f, + " blocknum_free_list_head[%" PRId64 "]", + t->blocknum_freelist_head.b); + fprintf( + f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); + fprintf(f, + " location_on_disk[%" PRId64 "]\n", + t->block_translation[b.b].u.diskoff); int64_t i; - for (i=0; ilength_of_array; i++) { - fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + for (i = 0; i < t->length_of_array; i++) { + fprintf(f, + " %" PRId64 ": %" PRId64 " %" PRId64 "\n", + i, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); } fprintf(f, "\n"); } else { @@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) { void block_table::dump_translation_table_pretty(FILE *f) { _mutex_lock(); struct translation *t = &_checkpointed; - assert(t->block_translation != nullptr); + invariant(t->block_translation != nullptr); for (int64_t i = 0; i < t->length_of_array; ++i) { - fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + fprintf(f, + "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", + i, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); } _mutex_unlock(); } @@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) { struct translation *t = &_current; if (b.b < t->length_of_array) { struct block_translation_pair *bx = &t->block_translation[b.b]; - printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); + printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", + b.b, + bx->u.diskoff, + bx->size); } _mutex_unlock(); } @@ -763,26 +877,31 @@ void block_table::destroy(void) { toku_free(_inprogress.block_translation); toku_free(_checkpointed.block_translation); - _bt_block_allocator.destroy(); + _bt_block_allocator->Destroy(); + delete _bt_block_allocator; toku_mutex_destroy(&_mutex); nb_mutex_destroy(&_safe_file_size_lock); } -int block_table::_translation_deserialize_from_buffer(struct translation *t, - DISKOFF location_on_disk, - uint64_t size_on_disk, - // out: buffer with serialized translation - unsigned char *translation_buffer) { +int block_table::_translation_deserialize_from_buffer( + struct translation *t, + DISKOFF location_on_disk, + uint64_t size_on_disk, + // out: buffer with serialized translation + unsigned char *translation_buffer) { int r = 0; - assert(location_on_disk != 0); + invariant(location_on_disk != 0); t->type = TRANSLATION_CHECKPOINTED; // check the checksum uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4); uint64_t offset = size_on_disk - 4; - uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); + uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset)); if (x1764 != stored_x1764) { - fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); + fprintf(stderr, + "Translation table checksum failure: calc=0x%08x read=0x%08x\n", + x1764, + stored_x1764); r = TOKUDB_BAD_CHECKSUM; goto exit; } @@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t, struct rbuf rb; rb.buf = translation_buffer; rb.ndone = 0; - rb.size = size_on_disk-4;//4==checksum + rb.size = size_on_disk - 4; // 4==checksum - t->smallest_never_used_blocknum = rbuf_blocknum(&rb); + t->smallest_never_used_blocknum = rbuf_blocknum(&rb); t->length_of_array = t->smallest_never_used_blocknum.b; invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - t->blocknum_freelist_head = rbuf_blocknum(&rb); + t->blocknum_freelist_head = rbuf_blocknum(&rb); XMALLOC_N(t->length_of_array, t->block_translation); for (int64_t i = 0; i < t->length_of_array; i++) { t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb); t->block_translation[i].size = rbuf_DISKOFF(&rb); } - invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk); - invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk); - invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); + invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == + (int64_t)size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == + location_on_disk); exit: return r; } int block_table::iterate(enum translation_type type, - BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { + BLOCKTABLE_CALLBACK f, + void *extra, + bool data_only, + bool used_only) { struct translation *src; - + int r = 0; switch (type) { - case TRANSLATION_CURRENT: - src = &_current; - break; - case TRANSLATION_INPROGRESS: - src = &_inprogress; - break; - case TRANSLATION_CHECKPOINTED: - src = &_checkpointed; - break; - default: - r = EINVAL; + case TRANSLATION_CURRENT: + src = &_current; + break; + case TRANSLATION_INPROGRESS: + src = &_inprogress; + break; + case TRANSLATION_CHECKPOINTED: + src = &_checkpointed; + break; + default: + r = EINVAL; } struct translation fakecurrent; @@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type, src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; _mutex_unlock(); int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { + for (i = 0; i < t->smallest_never_used_blocknum.b; i++) { struct block_translation_pair pair = t->block_translation[i]; - if (data_only && i< RESERVED_BLOCKNUMS) continue; - if (used_only && pair.size <= 0) continue; + if (data_only && i < RESERVED_BLOCKNUMS) + continue; + if (used_only && pair.size <= 0) + continue; r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); - if (r!=0) break; + if (r != 0) + break; } toku_free(t->block_translation); } @@ -856,8 +983,11 @@ typedef struct { int64_t total_space; } frag_extra; -static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { - frag_extra *info = (frag_extra *) extra; +static int frag_helper(BLOCKNUM UU(b), + int64_t size, + int64_t address, + void *extra) { + frag_extra *info = (frag_extra *)extra; if (size + address > info->total_space) info->total_space = size + address; @@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr return 0; } -void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) { - frag_extra info = { 0, 0 }; +void block_table::internal_fragmentation(int64_t *total_sizep, + int64_t *used_sizep) { + frag_extra info = {0, 0}; int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); - assert_zero(r); + invariant_zero(r); - if (total_sizep) *total_sizep = info.total_space; - if (used_sizep) *used_sizep = info.used_space; + if (total_sizep) + *total_sizep = info.total_space; + if (used_sizep) + *used_sizep = info.used_space; } -void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) { +void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, + DISKOFF *offset, + FT ft) { toku_mutex_assert_locked(&_mutex); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - _realloc_on_disk_internal(b, size, offset, ft, false, 0); + _realloc_on_disk_internal(b, size, offset, ft, false); } -void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) { +void block_table::realloc_descriptor_on_disk(DISKOFF size, + DISKOFF *offset, + FT ft, + int fd) { _mutex_lock(); _realloc_descriptor_on_disk_unlocked(size, offset, ft); _ensure_safe_write_unlocked(fd, size, *offset); @@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) { void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { // Requires: blocktable lock is held. // Requires: report->file_size_bytes is already filled in. - + // Count the headers. - report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; report->data_blocks = 1; - report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->checkpoint_bytes_additional = + BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; report->checkpoint_blocks_additional = 1; struct translation *current = &_current; @@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { struct translation *checkpointed = &_checkpointed; for (int64_t i = 0; i < checkpointed->length_of_array; i++) { - struct block_translation_pair *pair = &checkpointed->block_translation[i]; - if (pair->size > 0 && !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff)) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; + struct block_translation_pair *pair = + &checkpointed->block_translation[i]; + if (pair->size > 0 && + !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; } } struct translation *inprogress = &_inprogress; for (int64_t i = 0; i < inprogress->length_of_array; i++) { struct block_translation_pair *pair = &inprogress->block_translation[i]; - if (pair->size > 0 && !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) && - !(i < checkpointed->length_of_array && - checkpointed->block_translation[i].size > 0 && - checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) { + if (pair->size > 0 && + !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff) && + !(i < checkpointed->length_of_array && + checkpointed->block_translation[i].size > 0 && + checkpointed->block_translation[i].u.diskoff == + pair->u.diskoff)) { report->checkpoint_bytes_additional += pair->size; report->checkpoint_blocks_additional++; } } - _bt_block_allocator.get_unused_statistics(report); + _bt_block_allocator->UnusedStatistics(report); } void block_table::get_info64(struct ftinfo64 *s) { @@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) { _mutex_unlock(); } -int block_table::iterate_translation_tables(uint64_t checkpoint_count, - int (*iter)(uint64_t checkpoint_count, - int64_t total_num_rows, - int64_t blocknum, - int64_t diskoff, - int64_t size, - void *extra), - void *iter_extra) { +int block_table::iterate_translation_tables( + uint64_t checkpoint_count, + int (*iter)(uint64_t checkpoint_count, + int64_t total_num_rows, + int64_t blocknum, + int64_t diskoff, + int64_t size, + void *extra), + void *iter_extra) { int error = 0; _mutex_lock(); - int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array; + int64_t total_num_rows = + _current.length_of_array + _checkpointed.length_of_array; for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) { struct block_translation_pair *block = &_current.block_translation[i]; - error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + error = iter(checkpoint_count, + total_num_rows, + i, + block->u.diskoff, + block->size, + iter_extra); } for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) { - struct block_translation_pair *block = &_checkpointed.block_translation[i]; - error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + struct block_translation_pair *block = + &_checkpointed.block_translation[i]; + error = iter(checkpoint_count - 1, + total_num_rows, + i, + block->u.diskoff, + block->size, + iter_extra); } _mutex_unlock(); diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h index 8d391674540..dd732d4f372 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h @@ -62,13 +62,16 @@ enum { RESERVED_BLOCKNUMS }; -typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); +typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, + int64_t size, + int64_t address, + void *extra); static inline BLOCKNUM make_blocknum(int64_t b) { - BLOCKNUM result = { .b = b }; + BLOCKNUM result = {.b = b}; return result; } -static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; +static const BLOCKNUM ROLLBACK_NONE = {.b = 0}; /** * There are three copies of the translation table (btt) in the block table: @@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; * * inprogress Is only filled by copying from current, * and is the only version ever serialized to disk. - * (It is serialized to disk on checkpoint and clean shutdown.) + * (It is serialized to disk on checkpoint and clean + *shutdown.) * At end of checkpoint it replaces 'checkpointed'. * During a checkpoint, any 'pending' dirty writes will update * inprogress. * * current Is initialized by copying from checkpointed, - * is the only version ever modified while the database is in use, + * is the only version ever modified while the database is in + *use, * and is the only version ever copied to inprogress. * It is never stored on disk. */ class block_table { -public: + public: enum translation_type { TRANSLATION_NONE = 0, TRANSLATION_CURRENT, @@ -102,7 +107,10 @@ public: void create(); - int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); + int create_from_buffer(int fd, + DISKOFF location_on_disk, + DISKOFF size_on_disk, + unsigned char *translation_buffer); void destroy(); @@ -114,11 +122,21 @@ public: // Blocknums void allocate_blocknum(BLOCKNUM *res, struct ft *ft); - void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat); + void realloc_on_disk(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + struct ft *ft, + int fd, + bool for_checkpoint); void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint); - void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void translate_blocknum_to_offset_size(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size); void free_unused_blocknums(BLOCKNUM root); - void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd); + void realloc_descriptor_on_disk(DISKOFF size, + DISKOFF *offset, + struct ft *ft, + int fd); void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size); // External verfication @@ -127,15 +145,22 @@ public: void verify_no_free_blocknums(); // Serialization - void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size); + void serialize_translation_to_wbuf(int fd, + struct wbuf *w, + int64_t *address, + int64_t *size); // DEBUG ONLY (ftdump included), tests included void blocknum_dump_translation(BLOCKNUM b); void dump_translation_table_pretty(FILE *f); void dump_translation_table(FILE *f); - void block_free(uint64_t offset); + void block_free(uint64_t offset, uint64_t size); - int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); + int iterate(enum translation_type type, + BLOCKTABLE_CALLBACK f, + void *extra, + bool data_only, + bool used_only); void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep); // Requires: blocktable lock is held. @@ -146,13 +171,16 @@ public: void get_info64(struct ftinfo64 *); - int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); + int iterate_translation_tables( + uint64_t, + int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), + void *); -private: + private: struct block_translation_pair { // If in the freelist, use next_free_blocknum, otherwise diskoff. union { - DISKOFF diskoff; + DISKOFF diskoff; BLOCKNUM next_free_blocknum; } u; @@ -173,7 +201,8 @@ private: struct translation { enum translation_type type; - // Number of elements in array (block_translation). always >= smallest_never_used_blocknum + // Number of elements in array (block_translation). always >= + // smallest_never_used_blocknum int64_t length_of_array; BLOCKNUM smallest_never_used_blocknum; @@ -181,20 +210,28 @@ private: BLOCKNUM blocknum_freelist_head; struct block_translation_pair *block_translation; - // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size - // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff + // size_on_disk is stored in + // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size + // location_on is stored in + // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff }; void _create_internal(); - int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize - DISKOFF location_on_disk, // location of translation_buffer - uint64_t size_on_disk, - unsigned char * translation_buffer); // buffer with serialized translation + int _translation_deserialize_from_buffer( + struct translation *t, // destination into which to deserialize + DISKOFF location_on_disk, // location of translation_buffer + uint64_t size_on_disk, + unsigned char * + translation_buffer); // buffer with serialized translation - void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype); + void _copy_translation(struct translation *dst, + struct translation *src, + enum translation_type newtype); void _maybe_optimize_translation(struct translation *t); void _maybe_expand_translation(struct translation *t); - bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); + bool _translation_prevents_freeing(struct translation *t, + BLOCKNUM b, + struct block_translation_pair *old_pair); void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b); int64_t _calculate_size_on_disk(struct translation *t); bool _pair_is_unallocated(struct block_translation_pair *pair); @@ -203,14 +240,26 @@ private: // Blocknum management void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft); - void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint); - void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft); - void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat); - void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void _free_blocknum_unlocked(BLOCKNUM *bp, + struct ft *ft, + bool for_checkpoint); + void _realloc_descriptor_on_disk_unlocked(DISKOFF size, + DISKOFF *offset, + struct ft *ft); + void _realloc_on_disk_internal(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + struct ft *ft, + bool for_checkpoint); + void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size); // File management void _maybe_truncate_file(int fd, uint64_t size_needed_before); - void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset); + void _ensure_safe_write_unlocked(int fd, + DISKOFF block_size, + DISKOFF block_offset); // Verification bool _is_valid_blocknum(struct translation *t, BLOCKNUM b); @@ -220,29 +269,33 @@ private: bool _no_data_blocks_except_root(BLOCKNUM root); bool _blocknum_allocated(BLOCKNUM b); - // Locking + // Locking // // TODO: Move the lock to the FT void _mutex_lock(); void _mutex_unlock(); - // The current translation is the one used by client threads. + // The current translation is the one used by client threads. // It is not represented on disk. struct translation _current; - // The translation used by the checkpoint currently in progress. - // If the checkpoint thread allocates a block, it must also update the current translation. + // The translation used by the checkpoint currently in progress. + // If the checkpoint thread allocates a block, it must also update the + // current translation. struct translation _inprogress; - // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes, + // The translation for the data that shall remain inviolate on disk until + // the next checkpoint finishes, // after which any blocks used only in this translation can be freed. struct translation _checkpointed; - // The in-memory data structure for block allocation. + // The in-memory data structure for block allocation. // There is no on-disk data structure for block allocation. - // Note: This is *allocation* not *translation* - the block allocator is unaware of which - // blocks are used for which translation, but simply allocates and deallocates blocks. - block_allocator _bt_block_allocator; + // Note: This is *allocation* not *translation* - the block allocator is + // unaware of which + // blocks are used for which translation, but simply allocates and + // deallocates blocks. + BlockAllocator *_bt_block_allocator; toku_mutex_t _mutex; struct nb_mutex _safe_file_size_lock; bool _checkpoint_skipped; @@ -257,16 +310,16 @@ private: #include "ft/serialize/wbuf.h" -static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { +static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) { wbuf_ulonglong(w, b.b); } -static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { +static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) { wbuf_nocrc_ulonglong(w, b.b); } static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) { - wbuf_ulonglong(wb, (uint64_t) off); + wbuf_ulonglong(wb, (uint64_t)off); } #include "ft/serialize/rbuf.h" @@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) { return result; } -static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) { +static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, + memarena *UU(ma), + BLOCKNUM *blocknum) { *blocknum = rbuf_blocknum(rb); } diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc index 1719b6b7cb5..c2f815c6cf2 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc @@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen, strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; - char windowBits = source[1]; + int8_t windowBits = source[1]; int r = inflateInit2(&strm, windowBits); lazy_assert(r == Z_OK); strm.next_out = dest; diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc index 49d4368a3ab..8fcb5293412 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc @@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) // translation table itself won't fit in main memory. ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read, translation_address_on_disk); - assert(readsz >= translation_size_on_disk); - assert(readsz <= (ssize_t)size_to_read); + invariant(readsz >= translation_size_on_disk); + invariant(readsz <= (ssize_t)size_to_read); } // Create table and read in data. r = ft->blocktable.create_from_buffer(fd, @@ -411,73 +411,90 @@ exit: return r; } -static size_t -serialize_ft_min_size (uint32_t version) { +static size_t serialize_ft_min_size(uint32_t version) { size_t size = 0; - switch(version) { - case FT_LAYOUT_VERSION_29: - size += sizeof(uint64_t); // logrows in ft - case FT_LAYOUT_VERSION_28: - size += sizeof(uint32_t); // fanout in ft - case FT_LAYOUT_VERSION_27: - case FT_LAYOUT_VERSION_26: - case FT_LAYOUT_VERSION_25: - case FT_LAYOUT_VERSION_24: - case FT_LAYOUT_VERSION_23: - case FT_LAYOUT_VERSION_22: - case FT_LAYOUT_VERSION_21: - size += sizeof(MSN); // max_msn_in_ft - case FT_LAYOUT_VERSION_20: - case FT_LAYOUT_VERSION_19: - size += 1; // compression method - size += sizeof(MSN); // highest_unused_msn_for_upgrade - case FT_LAYOUT_VERSION_18: - size += sizeof(uint64_t); // time_of_last_optimize_begin - size += sizeof(uint64_t); // time_of_last_optimize_end - size += sizeof(uint32_t); // count_of_optimize_in_progress - size += sizeof(MSN); // msn_at_start_of_last_completed_optimize - size -= 8; // removed num_blocks_to_upgrade_14 - size -= 8; // removed num_blocks_to_upgrade_13 - case FT_LAYOUT_VERSION_17: - size += 16; - invariant(sizeof(STAT64INFO_S) == 16); - case FT_LAYOUT_VERSION_16: - case FT_LAYOUT_VERSION_15: - size += 4; // basement node size - size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14 - size += 8; // time of last verification - case FT_LAYOUT_VERSION_14: - size += 8; //TXNID that created - case FT_LAYOUT_VERSION_13: - size += ( 4 // build_id - +4 // build_id_original - +8 // time_of_creation - +8 // time_of_last_modification - ); + switch (version) { + case FT_LAYOUT_VERSION_29: + size += sizeof(uint64_t); // logrows in ft + case FT_LAYOUT_VERSION_28: + size += sizeof(uint32_t); // fanout in ft + case FT_LAYOUT_VERSION_27: + case FT_LAYOUT_VERSION_26: + case FT_LAYOUT_VERSION_25: + case FT_LAYOUT_VERSION_24: + case FT_LAYOUT_VERSION_23: + case FT_LAYOUT_VERSION_22: + case FT_LAYOUT_VERSION_21: + size += sizeof(MSN); // max_msn_in_ft + case FT_LAYOUT_VERSION_20: + case FT_LAYOUT_VERSION_19: + size += 1; // compression method + size += sizeof(MSN); // highest_unused_msn_for_upgrade + case FT_LAYOUT_VERSION_18: + size += sizeof(uint64_t); // time_of_last_optimize_begin + size += sizeof(uint64_t); // time_of_last_optimize_end + size += sizeof(uint32_t); // count_of_optimize_in_progress + size += sizeof(MSN); // msn_at_start_of_last_completed_optimize + size -= 8; // removed num_blocks_to_upgrade_14 + size -= 8; // removed num_blocks_to_upgrade_13 + case FT_LAYOUT_VERSION_17: + size += 16; + invariant(sizeof(STAT64INFO_S) == 16); + case FT_LAYOUT_VERSION_16: + case FT_LAYOUT_VERSION_15: + size += 4; // basement node size + size += 8; // num_blocks_to_upgrade_14 (previously + // num_blocks_to_upgrade, now one int each for upgrade + // from 13, 14 + size += 8; // time of last verification + case FT_LAYOUT_VERSION_14: + size += 8; // TXNID that created + case FT_LAYOUT_VERSION_13: + size += (4 // build_id + + + 4 // build_id_original + + + 8 // time_of_creation + + + 8 // time_of_last_modification + ); // fall through - case FT_LAYOUT_VERSION_12: - size += (+8 // "tokudata" - +4 // version - +4 // original_version - +4 // size - +8 // byte order verification - +8 // checkpoint_count - +8 // checkpoint_lsn - +4 // tree's nodesize - +8 // translation_size_on_disk - +8 // translation_address_on_disk - +4 // checksum - +8 // Number of blocks in old version. - +8 // diskoff - +4 // flags - ); - break; - default: - abort(); + case FT_LAYOUT_VERSION_12: + size += (+8 // "tokudata" + + + 4 // version + + + 4 // original_version + + + 4 // size + + + 8 // byte order verification + + + 8 // checkpoint_count + + + 8 // checkpoint_lsn + + + 4 // tree's nodesize + + + 8 // translation_size_on_disk + + + 8 // translation_address_on_disk + + + 4 // checksum + + + 8 // Number of blocks in old version. + + + 8 // diskoff + + + 4 // flags + ); + break; + default: + abort(); } - lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } @@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, struct rbuf *rb, uint64_t *checkpoint_count, LSN *checkpoint_lsn, - uint32_t * version_p) + uint32_t *version_p) // Effect: Read and parse the header of a fractalal tree // // Simply reading the raw bytes of the header into an rbuf is insensitive @@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd, // file AND the header is useless { int r = 0; - const int64_t prefix_size = 8 + // magic ("tokudata") - 4 + // version - 4 + // build_id - 4; // size + const int64_t prefix_size = 8 + // magic ("tokudata") + 4 + // version + 4 + // build_id + 4; // size const int64_t read_size = roundup_to_multiple(512, prefix_size); unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix); rb->buf = NULL; int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header); if (n != read_size) { - if (n==0) { + if (n == 0) { r = TOKUDB_DICTIONARY_NO_HEADER; - } else if (n<0) { + } else if (n < 0) { r = get_error_errno(); } else { r = EINVAL; @@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd, rbuf_init(rb, prefix, prefix_size); - //Check magic number + // Check magic number const void *magic; rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic,"tokudata",8)!=0) { - if ((*(uint64_t*)magic) == 0) { + if (memcmp(magic, "tokudata", 8) != 0) { + if ((*(uint64_t *)magic) == 0) { r = TOKUDB_DICTIONARY_NO_HEADER; } else { - r = EINVAL; //Not a tokudb file! Do not use. + r = EINVAL; // Not a tokudb file! Do not use. } goto exit; } - //Version MUST be in network order regardless of disk order. + // Version MUST be in network order regardless of disk order. uint32_t version; version = rbuf_network_int(rb); *version_p = version; if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { - r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use + r = TOKUDB_DICTIONARY_TOO_OLD; // Cannot use goto exit; } else if (version > FT_LAYOUT_VERSION) { - r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use + r = TOKUDB_DICTIONARY_TOO_NEW; // Cannot use goto exit; } - //build_id MUST be in network order regardless of disk order. + // build_id MUST be in network order regardless of disk order. uint32_t build_id __attribute__((__unused__)); build_id = rbuf_network_int(rb); int64_t min_header_size; min_header_size = serialize_ft_min_size(version); - //Size MUST be in network order regardless of disk order. + // Size MUST be in network order regardless of disk order. uint32_t size; size = rbuf_network_int(rb); - //If too big, it is corrupt. We would probably notice during checksum - //but may have to do a multi-gigabyte malloc+read to find out. - //If its too small reading rbuf would crash, so verify. - if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { + // If too big, it is corrupt. We would probably notice during checksum + // but may have to do a multi-gigabyte malloc+read to find out. + // If its too small reading rbuf would crash, so verify. + if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE || + size < min_header_size) { r = TOKUDB_DICTIONARY_NO_HEADER; goto exit; } - lazy_assert(rb->ndone==prefix_size); + lazy_assert(rb->ndone == prefix_size); rb->size = size; { toku_free(rb->buf); uint32_t size_to_read = roundup_to_multiple(512, size); XMALLOC_N_ALIGNED(512, size_to_read, rb->buf); - assert(offset_of_header%512==0); + invariant(offset_of_header % 512 == 0); n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header); if (n != size_to_read) { if (n < 0) { r = get_error_errno(); } else { - r = EINVAL; //Header might be useless (wrong size) or could be a disk read error. + r = EINVAL; // Header might be useless (wrong size) or could be + // a disk read error. } goto exit; } } - //It's version 14 or later. Magic looks OK. - //We have an rbuf that represents the header. - //Size is within acceptable bounds. + // It's version 14 or later. Magic looks OK. + // We have an rbuf that represents the header. + // Size is within acceptable bounds. - //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed) + // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function + // changed) uint32_t calculated_x1764; - calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4); + calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4); uint32_t stored_x1764; - stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); + stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4)); if (calculated_x1764 != stored_x1764) { - r = TOKUDB_BAD_CHECKSUM; //Header useless - fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764); + r = TOKUDB_BAD_CHECKSUM; // Header useless + fprintf(stderr, + "Header checksum failure: calc=0x%08x read=0x%08x\n", + calculated_x1764, + stored_x1764); goto exit; } - //Verify byte order + // Verify byte order const void *tmp_byte_order_check; lazy_assert((sizeof toku_byte_order_host) == 8); - rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order + rbuf_literal_bytes( + rb, &tmp_byte_order_check, 8); // Must not translate byte order int64_t byte_order_stored; - byte_order_stored = *(int64_t*)tmp_byte_order_check; + byte_order_stored = *(int64_t *)tmp_byte_order_check; if (byte_order_stored != toku_byte_order_host) { - r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary + r = TOKUDB_DICTIONARY_NO_HEADER; // Cannot use dictionary goto exit; } - //Load checkpoint count + // Load checkpoint count *checkpoint_count = rbuf_ulonglong(rb); *checkpoint_lsn = rbuf_LSN(rb); - //Restart at beginning during regular deserialization + // Restart at beginning during regular deserialization rb->ndone = 0; exit: @@ -620,11 +644,7 @@ exit: // Read ft from file into struct. Read both headers and use one. // We want the latest acceptable header whose checkpoint_lsn is no later // than max_acceptable_lsn. -int -toku_deserialize_ft_from(int fd, - LSN max_acceptable_lsn, - FT *ft) -{ +int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) { struct rbuf rb_0; struct rbuf rb_1; uint64_t checkpoint_count_0 = 0; @@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd, int r0, r1, r; toku_off_t header_0_off = 0; - r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0); + r0 = deserialize_ft_from_fd_into_rbuf(fd, + header_0_off, + &rb_0, + &checkpoint_count_0, + &checkpoint_lsn_0, + &version_0); if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) { h0_acceptable = true; } - toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; - r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1); + toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + r1 = deserialize_ft_from_fd_into_rbuf(fd, + header_1_off, + &rb_1, + &checkpoint_count_1, + &checkpoint_lsn_1, + &version_1); if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) { h1_acceptable = true; } @@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd, // We were unable to read either header or at least one is too // new. Certain errors are higher priority than others. Order of // these if/else if is important. - if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { + if (r0 == TOKUDB_DICTIONARY_TOO_NEW || + r1 == TOKUDB_DICTIONARY_TOO_NEW) { r = TOKUDB_DICTIONARY_TOO_NEW; - } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) { + } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || + r1 == TOKUDB_DICTIONARY_TOO_OLD) { r = TOKUDB_DICTIONARY_TOO_OLD; } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Both header checksums failed.\n"); r = TOKUDB_BAD_CHECKSUM; - } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) { + } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || + r1 == TOKUDB_DICTIONARY_NO_HEADER) { r = TOKUDB_DICTIONARY_NO_HEADER; } else { - r = r0 ? r0 : r1; //Arbitrarily report the error from the - //first header, unless it's readable + r = r0 ? r0 : r1; // Arbitrarily report the error from the + // first header, unless it's readable } - // it should not be possible for both headers to be later than the max_acceptable_lsn - invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && - (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); - invariant(r!=0); + // it should not be possible for both headers to be later than the + // max_acceptable_lsn + invariant( + !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && + (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); + invariant(r != 0); goto exit; } @@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd, invariant(version_0 >= version_1); rb = &rb_0; version = version_0; - } - else { + } else { invariant(checkpoint_count_1 == checkpoint_count_0 + 1); invariant(version_1 >= version_0); rb = &rb_1; @@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd, } else if (h0_acceptable) { if (r1 == TOKUDB_BAD_CHECKSUM) { // print something reassuring - fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); + fprintf( + stderr, + "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); } rb = &rb_0; version = version_0; } else if (h1_acceptable) { if (r0 == TOKUDB_BAD_CHECKSUM) { // print something reassuring - fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); + fprintf( + stderr, + "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); } rb = &rb_1; version = version_1; @@ -718,15 +756,13 @@ exit: return r; } - -size_t toku_serialize_ft_size (FT_HEADER h) { +size_t toku_serialize_ft_size(FT_HEADER h) { size_t size = serialize_ft_min_size(h->layout_version); - //There is no dynamic data. - lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + // There is no dynamic data. + lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } - void toku_serialize_ft_to_wbuf ( struct wbuf *wbuf, FT_HEADER h, @@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf ( } void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { - lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS); + lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS); struct wbuf w_translation; int64_t size_translation; int64_t address_translation; // Must serialize translation first, to get address,size for header. - bt->serialize_translation_to_wbuf(fd, &w_translation, - &address_translation, - &size_translation); - assert(size_translation == w_translation.ndone); + bt->serialize_translation_to_wbuf( + fd, &w_translation, &address_translation, &size_translation); + invariant(size_translation == w_translation.ndone); - // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. - assert(w_translation.size % 512 == 0); + // the number of bytes available in the buffer is 0 mod 512, and those last + // bytes are all initialized. + invariant(w_translation.size % 512 == 0); struct wbuf w_main; - size_t size_main = toku_serialize_ft_size(h); + size_t size_main = toku_serialize_ft_size(h); size_t size_main_aligned = roundup_to_multiple(512, size_main); - assert(size_main_alignedcheckpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + main_offset = (h->checkpoint_count & 0x1) + ? 0 + : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); toku_free(w_main.buf); toku_free(w_translation.buf); diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc index c4f4886b6a0..5914f8a1050 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc @@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) { num_cores = toku_os_get_number_active_processors(); int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); - block_allocator::maybe_initialize_trace(); toku_serialize_in_parallel = false; } void toku_ft_serialize_layer_destroy(void) { toku_thread_pool_destroy(&ft_pool); - block_allocator::maybe_close_trace(); } enum { FILE_CHANGE_INCREMENT = (16 << 20) }; @@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node, return 0; } -int -toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) { - +int toku_serialize_ftnode_to(int fd, + BLOCKNUM blocknum, + FTNODE node, + FTNODE_DISK_DATA *ndd, + bool do_rebalancing, + FT ft, + bool for_checkpoint) { size_t n_to_write; size_t n_uncompressed_bytes; char *compressed_buf = nullptr; - // because toku_serialize_ftnode_to is only called for + // because toku_serialize_ftnode_to is only called for // in toku_ftnode_flush_callback, we pass false // for in_parallel. The reasoning is that when we write - // nodes to disk via toku_ftnode_flush_callback, we + // nodes to disk via toku_ftnode_flush_callback, we // assume that it is being done on a non-critical - // background thread (probably for checkpointing), and therefore + // background thread (probably for checkpointing), and therefore // should not hog CPU, // // Should the above facts change, we may want to revisit @@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA toku_unsafe_fetch(&toku_serialize_in_parallel), &n_to_write, &n_uncompressed_bytes, - &compressed_buf - ); + &compressed_buf); if (r != 0) { return r; } - // If the node has never been written, then write the whole buffer, including the zeros - invariant(blocknum.b>=0); + // If the node has never been written, then write the whole buffer, + // including the zeros + invariant(blocknum.b >= 0); DISKOFF offset; // Dirties the ft - ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, - ft, fd, for_checkpoint, - // Allocations for nodes high in the tree are considered 'hot', - // as they are likely to move again in the next checkpoint. - node->height); + ft->blocktable.realloc_on_disk( + blocknum, n_to_write, &offset, ft, fd, for_checkpoint); tokutime_t t0 = toku_time_now(); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); tokutime_t t1 = toku_time_now(); tokutime_t io_time = t1 - t0; - toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); + toku_ft_status_update_flush_reason( + node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); toku_free(compressed_buf); - node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + node->dirty = 0; // See #1957. Must set the node to be clean after + // serializing it so that it doesn't get written again on + // the next checkpoint or eviction. return 0; } @@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { bn->seqinsert = orig_bn->seqinsert; bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; bn->stat64_delta = orig_bn->stat64_delta; + bn->logical_rows_delta = orig_bn->logical_rows_delta; bn->data_buffer.clone(&orig_bn->data_buffer); return bn; } @@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) { bn->seqinsert = 0; bn->stale_ancestor_messages_applied = false; bn->stat64_delta = ZEROSTATS; + bn->logical_rows_delta = 0; bn->data_buffer.init_zero(); return bn; } @@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, /* out */ int *layout_version_p); // This function upgrades a version 14 or 13 ftnode to the current -// verison. NOTE: This code assumes the first field of the rbuf has +// version. NOTE: This code assumes the first field of the rbuf has // already been read from the buffer (namely the layout_version of the // ftnode.) static int @@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL serialized->blocknum = log->blocknum; } -int -toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT ft, bool for_checkpoint) { +int toku_serialize_rollback_log_to(int fd, + ROLLBACK_LOG_NODE log, + SERIALIZED_ROLLBACK_LOG_NODE serialized_log, + bool is_serialized, + FT ft, + bool for_checkpoint) { size_t n_to_write; char *compressed_buf; struct serialized_rollback_log_node serialized_local; @@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA serialized_log->n_sub_blocks, serialized_log->sub_block, ft->h->compression_method, - &n_to_write, &compressed_buf); + &n_to_write, + &compressed_buf); // Dirties the ft DISKOFF offset; - ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, - ft, fd, for_checkpoint, - // We consider rollback log flushing the hottest possible allocation, - // since rollback logs are short-lived compared to FT nodes. - INT_MAX); + ft->blocktable.realloc_on_disk( + blocknum, n_to_write, &offset, ft, fd, for_checkpoint); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); toku_free(compressed_buf); if (!is_serialized) { toku_static_serialized_rollback_log_destroy(&serialized_local); - log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + log->dirty = 0; // See #1957. Must set the node to be clean after + // serializing it so that it doesn't get written again + // on the next checkpoint or eviction. } return 0; } @@ -2704,7 +2711,7 @@ exit: } static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { - // This function exists solely to accomodate future changes in compression. + // This function exists solely to accommodate future changes in compression. int r = 0; if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) || (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc new file mode 100644 index 00000000000..922850fb3e0 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc @@ -0,0 +1,833 @@ +/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "ft/serialize/rbtree_mhs.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include + +namespace MhsRbTree { + + Tree::Tree() : _root(NULL), _align(1) {} + + Tree::Tree(uint64_t align) : _root(NULL), _align(align) {} + + Tree::~Tree() { Destroy(); } + + void Tree::PreOrder(Node *tree) const { + if (tree != NULL) { + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + PreOrder(tree->_left); + PreOrder(tree->_right); + } + } + + void Tree::PreOrder() { PreOrder(_root); } + + void Tree::InOrder(Node *tree) const { + if (tree != NULL) { + InOrder(tree->_left); + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + InOrder(tree->_right); + } + } + + // yeah, i only care about in order visitor. -Jun + void Tree::InOrderVisitor(Node *tree, + void (*f)(void *, Node *, uint64_t), + void *extra, + uint64_t depth) { + if (tree != NULL) { + InOrderVisitor(tree->_left, f, extra, depth + 1); + f(extra, tree, depth); + InOrderVisitor(tree->_right, f, extra, depth + 1); + } + } + + void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t), + void *extra) { + InOrderVisitor(_root, f, extra, 0); + } + + void Tree::InOrder() { InOrder(_root); } + + void Tree::PostOrder(Node *tree) const { + if (tree != NULL) { + PostOrder(tree->_left); + PostOrder(tree->_right); + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + } + } + + void Tree::PostOrder() { PostOrder(_root); } + + Node *Tree::SearchByOffset(uint64_t offset) { + Node *x = _root; + while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) { + if (offset < rbn_offset(x).ToInt()) + x = x->_left; + else + x = x->_right; + } + + return x; + } + + // mostly for testing + Node *Tree::SearchFirstFitBySize(uint64_t size) { + if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size && + rbn_right_mhs(_root) < size) { + return nullptr; + } else { + return SearchFirstFitBySizeHelper(_root, size); + } + } + + Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) { + if (EffectiveSize(x) >= size) { + // only possible to go left + if (rbn_left_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_left, size); + else + return x; + } + if (rbn_left_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_left, size); + + if (rbn_right_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_right, size); + + // this is an invalid state + Dump(); + ValidateBalance(); + ValidateMhs(); + invariant(0); + return NULL; + } + + Node *Tree::MinNode(Node *tree) { + if (tree == NULL) + return NULL; + + while (tree->_left != NULL) + tree = tree->_left; + return tree; + } + + Node *Tree::MinNode() { return MinNode(_root); } + + Node *Tree::MaxNode(Node *tree) { + if (tree == NULL) + return NULL; + + while (tree->_right != NULL) + tree = tree->_right; + return tree; + } + + Node *Tree::MaxNode() { return MaxNode(_root); } + + Node *Tree::SuccessorHelper(Node *y, Node *x) { + while ((y != NULL) && (x == y->_right)) { + x = y; + y = y->_parent; + } + return y; + } + Node *Tree::Successor(Node *x) { + if (x->_right != NULL) + return MinNode(x->_right); + + Node *y = x->_parent; + return SuccessorHelper(y, x); + } + + Node *Tree::PredecessorHelper(Node *y, Node *x) { + while ((y != NULL) && (x == y->_left)) { + x = y; + y = y->_parent; + } + + return y; + } + Node *Tree::Predecessor(Node *x) { + if (x->_left != NULL) + return MaxNode(x->_left); + + Node *y = x->_parent; + return SuccessorHelper(y, x); + } + + /* + * px px + * / / + * x y + * / \ --(left rotation)--> / \ # + * lx y x ry + * / \ / \ + * ly ry lx ly + * max_hole_size updates are pretty local + */ + + void Tree::LeftRotate(Node *&root, Node *x) { + Node *y = x->_right; + + x->_right = y->_left; + rbn_right_mhs(x) = rbn_left_mhs(y); + + if (y->_left != NULL) + y->_left->_parent = x; + + y->_parent = x->_parent; + + if (x->_parent == NULL) { + root = y; + } else { + if (x->_parent->_left == x) { + x->_parent->_left = y; + } else { + x->_parent->_right = y; + } + } + y->_left = x; + rbn_left_mhs(y) = mhs_of_subtree(x); + + x->_parent = y; + } + + /* py py + * / / + * y x + * / \ --(right rotate)--> / \ # + * x ry lx y + * / \ / \ # + * lx rx rx ry + * + */ + + void Tree::RightRotate(Node *&root, Node *y) { + Node *x = y->_left; + + y->_left = x->_right; + rbn_left_mhs(y) = rbn_right_mhs(x); + + if (x->_right != NULL) + x->_right->_parent = y; + + x->_parent = y->_parent; + + if (y->_parent == NULL) { + root = x; + } else { + if (y == y->_parent->_right) + y->_parent->_right = x; + else + y->_parent->_left = x; + } + + x->_right = y; + rbn_right_mhs(x) = mhs_of_subtree(y); + y->_parent = x; + } + + // walking from this node up to update the mhs info + // whenver there is change on left/right mhs or size we should recalculate. + // prerequisit: the children of the node are mhs up-to-date. + void Tree::RecalculateMhs(Node *node) { + uint64_t *p_node_mhs = 0; + Node *parent = node->_parent; + + if (!parent) + return; + + uint64_t max_mhs = mhs_of_subtree(node); + if (node == parent->_left) { + p_node_mhs = &rbn_left_mhs(parent); + } else if (node == parent->_right) { + p_node_mhs = &rbn_right_mhs(parent); + } else { + return; + } + if (*p_node_mhs != max_mhs) { + *p_node_mhs = max_mhs; + RecalculateMhs(parent); + } + } + + void Tree::IsNewNodeMergable(Node *pred, + Node *succ, + Node::BlockPair pair, + bool *left_merge, + bool *right_merge) { + if (pred) { + OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred); + if (end_of_pred < pair._offset) + *left_merge = false; + else { + invariant(end_of_pred == pair._offset); + *left_merge = true; + } + } + if (succ) { + OUUInt64 begin_of_succ = rbn_offset(succ); + OUUInt64 end_of_node = pair._offset + pair._size; + if (end_of_node < begin_of_succ) { + *right_merge = false; + } else { + invariant(end_of_node == begin_of_succ); + *right_merge = true; + } + } + } + + void Tree::AbsorbNewNode(Node *pred, + Node *succ, + Node::BlockPair pair, + bool left_merge, + bool right_merge, + bool is_right_child) { + invariant(left_merge || right_merge); + if (left_merge && right_merge) { + // merge to the succ + if (!is_right_child) { + rbn_size(succ) += pair._size; + rbn_offset(succ) = pair._offset; + // merge to the pred + rbn_size(pred) += rbn_size(succ); + // to keep the invariant of the tree -no overlapping holes + rbn_offset(succ) += rbn_size(succ); + rbn_size(succ) = 0; + RecalculateMhs(succ); + RecalculateMhs(pred); + // pred dominates succ. this is going to + // update the pred labels separately. + // remove succ + RawRemove(_root, succ); + } else { + rbn_size(pred) += pair._size; + rbn_offset(succ) = rbn_offset(pred); + rbn_size(succ) += rbn_size(pred); + rbn_offset(pred) += rbn_size(pred); + rbn_size(pred) = 0; + RecalculateMhs(pred); + RecalculateMhs(succ); + // now remove pred + RawRemove(_root, pred); + } + } else if (left_merge) { + rbn_size(pred) += pair._size; + RecalculateMhs(pred); + } else if (right_merge) { + rbn_offset(succ) -= pair._size; + rbn_size(succ) += pair._size; + RecalculateMhs(succ); + } + } + // this is the most tedious part, but not complicated: + // 1.find where to insert the pair + // 2.if the pred and succ can merge with the pair. merge with them. either + // pred + // or succ can be removed. + // 3. if only left-mergable or right-mergeable, just merge + // 4. non-mergable case. insert the node and run the fixup. + + int Tree::Insert(Node *&root, Node::BlockPair pair) { + Node *x = _root; + Node *y = NULL; + bool left_merge = false; + bool right_merge = false; + Node *node = NULL; + + while (x != NULL) { + y = x; + if (pair._offset < rbn_key(x)) + x = x->_left; + else + x = x->_right; + } + + // we found where to insert, lets find out the pred and succ for + // possible + // merges. + // node->parent = y; + Node *pred, *succ; + if (y != NULL) { + if (pair._offset < rbn_key(y)) { + // as the left child + pred = PredecessorHelper(y->_parent, y); + succ = y; + IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge); + if (left_merge || right_merge) { + AbsorbNewNode( + pred, succ, pair, left_merge, right_merge, false); + } else { + // construct the node + Node::Pair mhsp {0, 0}; + node = + new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + y->_left = node; + node->_parent = y; + RecalculateMhs(node); + } + + } else { + // as the right child + pred = y; + succ = SuccessorHelper(y->_parent, y); + IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge); + if (left_merge || right_merge) { + AbsorbNewNode( + pred, succ, pair, left_merge, right_merge, true); + } else { + // construct the node + Node::Pair mhsp {0, 0}; + node = + new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + y->_right = node; + node->_parent = y; + RecalculateMhs(node); + } + } + } else { + Node::Pair mhsp {0, 0}; + node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + root = node; + } + if (!left_merge && !right_merge) { + invariant_notnull(node); + node->_color = EColor::RED; + return InsertFixup(root, node); + } + return 0; + } + + int Tree::InsertFixup(Node *&root, Node *node) { + Node *parent, *gparent; + while ((parent = rbn_parent(node)) && rbn_is_red(parent)) { + gparent = rbn_parent(parent); + if (parent == gparent->_left) { + { + Node *uncle = gparent->_right; + if (uncle && rbn_is_red(uncle)) { + rbn_set_black(uncle); + rbn_set_black(parent); + rbn_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->_right == node) { + Node *tmp; + LeftRotate(root, parent); + tmp = parent; + parent = node; + node = tmp; + } + + rbn_set_black(parent); + rbn_set_red(gparent); + RightRotate(root, gparent); + } else { + { + Node *uncle = gparent->_left; + if (uncle && rbn_is_red(uncle)) { + rbn_set_black(uncle); + rbn_set_black(parent); + rbn_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->_left == node) { + Node *tmp; + RightRotate(root, parent); + tmp = parent; + parent = node; + node = tmp; + } + rbn_set_black(parent); + rbn_set_red(gparent); + LeftRotate(root, gparent); + } + } + rbn_set_black(root); + return 0; + } + + int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); } + + uint64_t Tree::Remove(size_t size) { + Node *node = SearchFirstFitBySize(size); + return Remove(_root, node, size); + } + + void Tree::RawRemove(Node *&root, Node *node) { + Node *child, *parent; + EColor color; + + if ((node->_left != NULL) && (node->_right != NULL)) { + Node *replace = node; + replace = replace->_right; + while (replace->_left != NULL) + replace = replace->_left; + + if (rbn_parent(node)) { + if (rbn_parent(node)->_left == node) + rbn_parent(node)->_left = replace; + else + rbn_parent(node)->_right = replace; + } else { + root = replace; + } + child = replace->_right; + parent = rbn_parent(replace); + color = rbn_color(replace); + + if (parent == node) { + parent = replace; + } else { + if (child) + rbn_parent(child) = parent; + + parent->_left = child; + rbn_left_mhs(parent) = rbn_right_mhs(replace); + RecalculateMhs(parent); + replace->_right = node->_right; + rbn_set_parent(node->_right, replace); + rbn_right_mhs(replace) = rbn_right_mhs(node); + } + + replace->_parent = node->_parent; + replace->_color = node->_color; + replace->_left = node->_left; + rbn_left_mhs(replace) = rbn_left_mhs(node); + node->_left->_parent = replace; + RecalculateMhs(replace); + if (color == EColor::BLACK) + RawRemoveFixup(root, child, parent); + delete node; + return; + } + + if (node->_left != NULL) + child = node->_left; + else + child = node->_right; + + parent = node->_parent; + color = node->_color; + + if (child) + child->_parent = parent; + + if (parent) { + if (parent->_left == node) { + parent->_left = child; + rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0; + } else { + parent->_right = child; + rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0; + } + RecalculateMhs(parent); + } else + root = child; + if (color == EColor::BLACK) + RawRemoveFixup(root, child, parent); + delete node; + } + + void Tree::RawRemove(uint64_t offset) { + Node *node = SearchByOffset(offset); + RawRemove(_root, node); + } + static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; + } + uint64_t Tree::Remove(Node *&root, Node *node, size_t size) { + OUUInt64 n_offset = rbn_offset(node); + OUUInt64 n_size = rbn_size(node); + OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align)); + + invariant((answer_offset + size) <= (n_offset + n_size)); + if (answer_offset == n_offset) { + rbn_offset(node) += size; + rbn_size(node) -= size; + RecalculateMhs(node); + if (rbn_size(node) == 0) { + RawRemove(root, node); + } + + } else { + if (answer_offset + size == n_offset + n_size) { + rbn_size(node) -= size; + RecalculateMhs(node); + } else { + // well, cut in the middle... + rbn_size(node) = answer_offset - n_offset; + RecalculateMhs(node); + Insert(_root, + {(answer_offset + size), + (n_offset + n_size) - (answer_offset + size)}); + } + } + return answer_offset.ToInt(); + } + + void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) { + Node *other; + while ((!node || rbn_is_black(node)) && node != root) { + if (parent->_left == node) { + other = parent->_right; + if (rbn_is_red(other)) { + // Case 1: the brother of X, w, is read + rbn_set_black(other); + rbn_set_red(parent); + LeftRotate(root, parent); + other = parent->_right; + } + if ((!other->_left || rbn_is_black(other->_left)) && + (!other->_right || rbn_is_black(other->_right))) { + // Case 2: w is black and both of w's children are black + rbn_set_red(other); + node = parent; + parent = rbn_parent(node); + } else { + if (!other->_right || rbn_is_black(other->_right)) { + // Case 3: w is black and left child of w is red but + // right + // child is black + rbn_set_black(other->_left); + rbn_set_red(other); + RightRotate(root, other); + other = parent->_right; + } + // Case 4: w is black and right child of w is red, + // regardless of + // left child's color + rbn_set_color(other, rbn_color(parent)); + rbn_set_black(parent); + rbn_set_black(other->_right); + LeftRotate(root, parent); + node = root; + break; + } + } else { + other = parent->_left; + if (rbn_is_red(other)) { + // Case 1: w is red + rbn_set_black(other); + rbn_set_red(parent); + RightRotate(root, parent); + other = parent->_left; + } + if ((!other->_left || rbn_is_black(other->_left)) && + (!other->_right || rbn_is_black(other->_right))) { + // Case 2: w is black and both children are black + rbn_set_red(other); + node = parent; + parent = rbn_parent(node); + } else { + if (!other->_left || rbn_is_black(other->_left)) { + // Case 3: w is black and left child of w is red whereas + // right child is black + rbn_set_black(other->_right); + rbn_set_red(other); + LeftRotate(root, other); + other = parent->_left; + } + // Case 4:w is black and right child of w is red, regardless + // of + // the left child's color + rbn_set_color(other, rbn_color(parent)); + rbn_set_black(parent); + rbn_set_black(other->_left); + RightRotate(root, parent); + node = root; + break; + } + } + } + if (node) + rbn_set_black(node); + } + + void Tree::Destroy(Node *&tree) { + if (tree == NULL) + return; + + if (tree->_left != NULL) + Destroy(tree->_left); + if (tree->_right != NULL) + Destroy(tree->_right); + + delete tree; + tree = NULL; + } + + void Tree::Destroy() { Destroy(_root); } + + void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) { + if (tree != NULL) { + if (dir == EDirection::NONE) + fprintf(stderr, + "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64 + "))(B) is root\n", + rbn_offset(tree).ToInt(), + rbn_size(tree).ToInt(), + rbn_left_mhs(tree), + rbn_right_mhs(tree)); + else + fprintf(stderr, + "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64 + "))(%c) is %" PRIu64 "'s %s\n", + rbn_offset(tree).ToInt(), + rbn_size(tree).ToInt(), + rbn_left_mhs(tree), + rbn_right_mhs(tree), + rbn_is_red(tree) ? 'R' : 'B', + pair._offset.ToInt(), + dir == EDirection::RIGHT ? "right child" : "left child"); + + Dump(tree->_left, tree->_hole, EDirection::LEFT); + Dump(tree->_right, tree->_hole, EDirection::RIGHT); + } + } + + uint64_t Tree::EffectiveSize(Node *node) { + OUUInt64 offset = rbn_offset(node); + OUUInt64 size = rbn_size(node); + OUUInt64 end = offset + size; + OUUInt64 aligned_offset(align(offset.ToInt(), _align)); + if (aligned_offset > end) { + return 0; + } + return (end - aligned_offset).ToInt(); + } + + void Tree::Dump() { + if (_root != NULL) + Dump(_root, _root->_hole, (EDirection)0); + } + + static void vis_bal_f(void *extra, Node *node, uint64_t depth) { + uint64_t **p = (uint64_t **)extra; + uint64_t min = *p[0]; + uint64_t max = *p[1]; + if (node->_left) { + Node *left = node->_left; + invariant(node == left->_parent); + } + + if (node->_right) { + Node *right = node->_right; + invariant(node == right->_parent); + } + + if (!node->_left || !node->_right) { + if (min > depth) { + *p[0] = depth; + } else if (max < depth) { + *p[1] = depth; + } + } + } + + void Tree::ValidateBalance() { + uint64_t min_depth = 0xffffffffffffffff; + uint64_t max_depth = 0; + if (!_root) { + return; + } + uint64_t *p[2] = {&min_depth, &max_depth}; + InOrderVisitor(vis_bal_f, (void *)p); + invariant((min_depth + 1) * 2 >= max_depth + 1); + } + + static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) { + Node::BlockPair **p = (Node::BlockPair **)extra; + + invariant_notnull(*p); + invariant((*p)->_offset == node->_hole._offset); + + *p = *p + 1; + } + + // validate the input pairs matches with sorted pairs + void Tree::ValidateInOrder(Node::BlockPair *pairs) { + InOrderVisitor(vis_cmp_f, &pairs); + } + + uint64_t Tree::ValidateMhs(Node *node) { + if (!node) + return 0; + else { + uint64_t mhs_left = ValidateMhs(node->_left); + uint64_t mhs_right = ValidateMhs(node->_right); + if (mhs_left != rbn_left_mhs(node)) { + printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left); + Dump(node, node->_hole, (EDirection)0); + } + invariant(mhs_left == rbn_left_mhs(node)); + + if (mhs_right != rbn_right_mhs(node)) { + printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right); + Dump(node, node->_hole, (EDirection)0); + } + invariant(mhs_right == rbn_right_mhs(node)); + return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right)); + } + } + + void Tree::ValidateMhs() { + if (!_root) + return; + uint64_t mhs_left = ValidateMhs(_root->_left); + uint64_t mhs_right = ValidateMhs(_root->_right); + invariant(mhs_left == rbn_left_mhs(_root)); + invariant(mhs_right == rbn_right_mhs(_root)); + } + +} // namespace MhsRbTree diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h new file mode 100644 index 00000000000..92f1e278e1a --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h @@ -0,0 +1,351 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include + +#include "portability/toku_pthread.h" +#include "portability/toku_stdint.h" +#include "portability/toku_stdlib.h" + +// RBTree(Red-black tree) with max hole sizes for subtrees. + +// This is a tentative data struct to improve the block allocation time +// complexity from the linear time to the log time. Please be noted this DS only +// supports first-fit for now. It is actually easier to do it with +// best-fit.(just +// sort by size). + +// RBTree is a classic data struct with O(log(n)) for insertion, deletion and +// search. Many years have seen its efficiency. + +// a *hole* is the representation of an available BlockPair for allocation. +// defined as (start_address,size) or (offset, size) interchangably. + +// each node has a *label* to indicate a pair of the max hole sizes for its +// subtree. + +// We are implementing a RBTree with max hole sizes for subtree. It is a red +// black tree that is sorted by the start_address but also labeld with the max +// hole sizes of the subtrees. + +// [(6,3)] -> [(offset, size)], the hole +// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label +/* / \ */ +// [(0, 1)] [(10, 5)] +// [{0, 2}] [{0, 0}] +/* \ */ +// [(3, 2)] +// [{0, 0}] +// request of allocation size=2 goes from root to [(3,2)]. + +// above example shows a simplified RBTree_max_holes. +// it is easier to tell the search time is O(log(n)) as we can make a decision +// on each descent until we get to the target. + +// the only question is if we can keep the maintenance cost low -- and i think +// it is not a problem becoz an insertion/deletion is only going to update the +// max_hole_sizes of the nodes along the path from the root to the node to be +// deleted/inserted. The path can be cached and search is anyway O(log(n)). + +// unlike the typical rbtree, Tree has to handle the inserts and deletes +// with more care: an allocation that triggers the delete might leave some +// unused space which we can simply update the start_addr and size without +// worrying overlapping. An free might not only mean the insertion but also +// *merging* with the adjacent holes. + +namespace MhsRbTree { + +#define offset_t uint64_t + enum class EColor { RED, BLACK }; + enum class EDirection { NONE = 0, LEFT, RIGHT }; + + // I am a bit tired of fixing overflow/underflow, just quickly craft some + // int + // class that has an infinity-like max value and prevents overflow and + // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not + // a problem here. :-/ - JYM + class OUUInt64 { + public: + static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff; + OUUInt64() : _value(0) {} + OUUInt64(uint64_t s) : _value(s) {} + bool operator<(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value < r.ToInt(); + } + bool operator>(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value > r.ToInt(); + } + bool operator<=(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value <= r.ToInt(); + } + bool operator>=(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value >= r.ToInt(); + } + OUUInt64 operator+(const OUUInt64 &r) const { + if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) { + OUUInt64 tmp(MHS_MAX_VAL); + return tmp; + } else { + // detecting overflow + invariant((MHS_MAX_VAL - _value) >= r.ToInt()); + uint64_t plus = _value + r.ToInt(); + OUUInt64 tmp(plus); + return tmp; + } + } + OUUInt64 operator-(const OUUInt64 &r) const { + invariant(r.ToInt() != MHS_MAX_VAL); + if (_value == MHS_MAX_VAL) { + return *this; + } else { + invariant(_value >= r.ToInt()); + uint64_t minus = _value - r.ToInt(); + OUUInt64 tmp(minus); + return tmp; + } + } + OUUInt64 operator-=(const OUUInt64 &r) { + if (_value != MHS_MAX_VAL) { + invariant(r.ToInt() != MHS_MAX_VAL); + invariant(_value >= r.ToInt()); + _value -= r.ToInt(); + } + return *this; + } + OUUInt64 operator+=(const OUUInt64 &r) { + if (_value != MHS_MAX_VAL) { + if (r.ToInt() == MHS_MAX_VAL) { + _value = MHS_MAX_VAL; + } else { + invariant((MHS_MAX_VAL - _value) >= r.ToInt()); + this->_value += r.ToInt(); + } + } + return *this; + } + bool operator==(const OUUInt64 &r) const { + return _value == r.ToInt(); + } + bool operator!=(const OUUInt64 &r) const { + return _value != r.ToInt(); + } + OUUInt64 operator=(const OUUInt64 &r) { + _value = r.ToInt(); + return *this; + } + uint64_t ToInt() const { return _value; } + + private: + uint64_t _value; + }; + + class Node { + public: + struct BlockPair { + OUUInt64 _offset; + OUUInt64 _size; + + BlockPair() : _offset(0), _size(0) {} + BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} + + BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {} + int operator<(const struct BlockPair &rhs) const { + return _offset < rhs._offset; + } + int operator<(const uint64_t &o) const { return _offset < o; } + }; + + struct Pair { + uint64_t _left; + uint64_t _right; + Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {} + }; + + EColor _color; + struct BlockPair _hole; + struct Pair _label; + Node *_left; + Node *_right; + Node *_parent; + + Node(EColor c, + Node::BlockPair h, + struct Pair lb, + Node *l, + Node *r, + Node *p) + : _color(c), + _hole(h), + _label(lb), + _left(l), + _right(r), + _parent(p) {} + }; + + class Tree { + private: + Node *_root; + uint64_t _align; + + public: + Tree(); + Tree(uint64_t); + ~Tree(); + + void PreOrder(); + void InOrder(); + void PostOrder(); + // immutable operations + Node *SearchByOffset(uint64_t addr); + Node *SearchFirstFitBySize(uint64_t size); + + Node *MinNode(); + Node *MaxNode(); + + Node *Successor(Node *); + Node *Predecessor(Node *); + + // mapped from tree_allocator::free_block + int Insert(Node::BlockPair pair); + // mapped from tree_allocator::alloc_block + uint64_t Remove(size_t size); + // mapped from tree_allocator::alloc_block_after + + void RawRemove(uint64_t offset); + void Destroy(); + // print the tree + void Dump(); + // validation + // balance + void ValidateBalance(); + void ValidateInOrder(Node::BlockPair *); + void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *); + void ValidateMhs(); + + private: + void PreOrder(Node *node) const; + void InOrder(Node *node) const; + void PostOrder(Node *node) const; + Node *SearchByOffset(Node *node, offset_t addr) const; + Node *SearchFirstFitBySize(Node *node, size_t size) const; + + Node *MinNode(Node *node); + Node *MaxNode(Node *node); + + // rotations to fix up. we will have to update the labels too. + void LeftRotate(Node *&root, Node *x); + void RightRotate(Node *&root, Node *y); + + int Insert(Node *&root, Node::BlockPair pair); + int InsertFixup(Node *&root, Node *node); + + void RawRemove(Node *&root, Node *node); + uint64_t Remove(Node *&root, Node *node, size_t size); + void RawRemoveFixup(Node *&root, Node *node, Node *parent); + + void Destroy(Node *&tree); + void Dump(Node *tree, Node::BlockPair pair, EDirection dir); + void RecalculateMhs(Node *node); + void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *); + void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool); + Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size); + + Node *SuccessorHelper(Node *y, Node *x); + + Node *PredecessorHelper(Node *y, Node *x); + + void InOrderVisitor(Node *, + void (*f)(void *, Node *, uint64_t), + void *, + uint64_t); + uint64_t ValidateMhs(Node *); + + uint64_t EffectiveSize(Node *); +// mixed with some macros..... +#define rbn_parent(r) ((r)->_parent) +#define rbn_color(r) ((r)->_color) +#define rbn_is_red(r) ((r)->_color == EColor::RED) +#define rbn_is_black(r) ((r)->_color == EColor::BLACK) +#define rbn_set_black(r) \ + do { \ + (r)->_color = EColor::BLACK; \ + } while (0) +#define rbn_set_red(r) \ + do { \ + (r)->_color = EColor::RED; \ + } while (0) +#define rbn_set_parent(r, p) \ + do { \ + (r)->_parent = (p); \ + } while (0) +#define rbn_set_color(r, c) \ + do { \ + (r)->_color = (c); \ + } while (0) +#define rbn_set_offset(r) \ + do { \ + (r)->_hole._offset = (c); \ + } while (0) +#define rbn_set_size(r, c) \ + do { \ + (r)->_hole._size = (c); \ + } while (0) +#define rbn_set_left_mhs(r, c) \ + do { \ + (r)->_label._left = (c); \ + } while (0) +#define rbn_set_right_mhs(r, c) \ + do { \ + (r)->_label._right = (c); \ + } while (0) +#define rbn_size(r) ((r)->_hole._size) +#define rbn_offset(r) ((r)->_hole._offset) +#define rbn_key(r) ((r)->_hole._offset) +#define rbn_left_mhs(r) ((r)->_label._left) +#define rbn_right_mhs(r) ((r)->_label._right) +#define mhs_of_subtree(y) \ + (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y))) + }; + +} // namespace MhsRbTree diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc deleted file mode 100644 index 3670ef81cc2..00000000000 --- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -#include "ft/tests/test.h" - -#include "ft/serialize/block_allocator_strategy.h" - -static const uint64_t alignment = 4096; - -static void test_first_vs_best_fit(void) { - struct block_allocator::blockpair pairs[] = { - block_allocator::blockpair(1 * alignment, 6 * alignment), - // hole between 7x align -> 8x align - block_allocator::blockpair(8 * alignment, 4 * alignment), - // hole between 12x align -> 16x align - block_allocator::blockpair(16 * alignment, 1 * alignment), - block_allocator::blockpair(17 * alignment, 2 * alignment), - // hole between 19 align -> 21x align - block_allocator::blockpair(21 * alignment, 2 * alignment), - }; - const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); - - block_allocator::blockpair *bp; - - // first fit - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment); - assert(bp == &pairs[1]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment); - assert(bp == nullptr); - - // best fit - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment); - assert(bp == &pairs[3]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment); - assert(bp == &pairs[1]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment); - assert(bp == nullptr); -} - -static void test_padded_fit(void) { - struct block_allocator::blockpair pairs[] = { - block_allocator::blockpair(1 * alignment, 1 * alignment), - // 4096 byte hole after bp[0] - block_allocator::blockpair(3 * alignment, 1 * alignment), - // 8192 byte hole after bp[1] - block_allocator::blockpair(6 * alignment, 1 * alignment), - // 16384 byte hole after bp[2] - block_allocator::blockpair(11 * alignment, 1 * alignment), - // 32768 byte hole after bp[3] - block_allocator::blockpair(17 * alignment, 1 * alignment), - // 116kb hole after bp[4] - block_allocator::blockpair(113 * alignment, 1 * alignment), - // 256kb hole after bp[5] - block_allocator::blockpair(371 * alignment, 1 * alignment), - }; - const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); - - block_allocator::blockpair *bp; - - // padding for a 100 byte allocation will be < than standard alignment, - // so it should fit in the first 4096 byte hole. - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment); - assert(bp == &pairs[0]); - - // Even padded, a 12kb alloc will fit in a 16kb hole - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment); - assert(bp == &pairs[2]); - - // would normally fit in the 116kb hole but the padding will bring it over - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment); - assert(bp == &pairs[5]); - - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment); - assert(bp == &pairs[5]); -} - -int test_main(int argc, const char *argv[]) { - (void) argc; - (void) argv; - - test_first_vs_best_fit(); - test_padded_fit(); - - return 0; -} diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc index d80ee83cbc9..3eff52b915d 100644 --- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc @@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" -static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) { - ba->validate(); +static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) { + ba->Validate(); uint64_t actual_answer; - const uint64_t heat = random() % 2; - ba->alloc_block(512 * size, heat, &actual_answer); - ba->validate(); + ba->AllocBlock(512 * size, &actual_answer); + ba->Validate(); - assert(actual_answer%512==0); - *answer = actual_answer/512; + invariant(actual_answer % 512 == 0); + *answer = actual_answer / 512; } -static void ba_free(block_allocator *ba, uint64_t offset) { - ba->validate(); - ba->free_block(offset * 512); - ba->validate(); +static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) { + ba->Validate(); + ba->FreeBlock(offset * 512, 512 * size); + ba->Validate(); } -static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order, - uint64_t expected_offset, uint64_t expected_size) { +static void ba_check_l(BlockAllocator *ba, + uint64_t blocknum_in_layout_order, + uint64_t expected_offset, + uint64_t expected_size) { uint64_t actual_offset, actual_size; - int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); - assert(r==0); - assert(expected_offset*512 == actual_offset); - assert(expected_size *512 == actual_size); + int r = ba->NthBlockInLayoutOrder( + blocknum_in_layout_order, &actual_offset, &actual_size); + invariant(r == 0); + invariant(expected_offset * 512 == actual_offset); + invariant(expected_size * 512 == actual_size); } -static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) { +static void ba_check_none(BlockAllocator *ba, + uint64_t blocknum_in_layout_order) { uint64_t actual_offset, actual_size; - int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); - assert(r==-1); + int r = ba->NthBlockInLayoutOrder( + blocknum_in_layout_order, &actual_offset, &actual_size); + invariant(r == -1); } - // Simple block allocator test -static void test_ba0(block_allocator::allocation_strategy strategy) { - block_allocator allocator; - block_allocator *ba = &allocator; - ba->create(100*512, 1*512); - ba->set_strategy(strategy); - assert(ba->allocated_limit()==100*512); +static void test_ba0() { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; + ba->Create(100 * 512, 1 * 512); + invariant(ba->AllocatedLimit() == 100 * 512); uint64_t b2, b3, b4, b5, b6, b7; - ba_alloc(ba, 100, &b2); - ba_alloc(ba, 100, &b3); - ba_alloc(ba, 100, &b4); - ba_alloc(ba, 100, &b5); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b7); - ba_free(ba, b2); - ba_alloc(ba, 100, &b2); - ba_free(ba, b4); - ba_free(ba, b6); + ba_alloc(ba, 100, &b2); + ba_alloc(ba, 100, &b3); + ba_alloc(ba, 100, &b4); + ba_alloc(ba, 100, &b5); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b7); + ba_free(ba, b2, 100); + ba_alloc(ba, 100, &b2); + ba_free(ba, b4, 100); + ba_free(ba, b6, 100); uint64_t b8, b9; - ba_alloc(ba, 100, &b4); - ba_free(ba, b2); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b8); - ba_alloc(ba, 100, &b9); - ba_free(ba, b6); - ba_free(ba, b7); - ba_free(ba, b8); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b7); - ba_free(ba, b4); - ba_alloc(ba, 100, &b4); + ba_alloc(ba, 100, &b4); + ba_free(ba, b2, 100); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b8); + ba_alloc(ba, 100, &b9); + ba_free(ba, b6, 100); + ba_free(ba, b7, 100); + ba_free(ba, b8, 100); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b7); + ba_free(ba, b4, 100); + ba_alloc(ba, 100, &b4); - ba->destroy(); + ba->Destroy(); } // Manually to get coverage of all the code in the block allocator. -static void -test_ba1(block_allocator::allocation_strategy strategy, int n_initial) { - block_allocator allocator; - block_allocator *ba = &allocator; - ba->create(0*512, 1*512); - ba->set_strategy(strategy); +static void test_ba1(int n_initial) { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; + ba->Create(0 * 512, 1 * 512); - int n_blocks=0; + int n_blocks = 0; uint64_t blocks[1000]; for (int i = 0; i < 1000; i++) { - if (i < n_initial || random() % 2 == 0) { - if (n_blocks < 1000) { - ba_alloc(ba, 1, &blocks[n_blocks]); - //printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]); - n_blocks++; - } - } else { - if (n_blocks > 0) { - int blocknum = random()%n_blocks; - //printf("F[%d]%ld\n", blocknum, blocks[blocknum]); - ba_free(ba, blocks[blocknum]); - blocks[blocknum]=blocks[n_blocks-1]; - n_blocks--; - } - } + if (i < n_initial || random() % 2 == 0) { + if (n_blocks < 1000) { + ba_alloc(ba, 1, &blocks[n_blocks]); + // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]); + n_blocks++; + } + } else { + if (n_blocks > 0) { + int blocknum = random() % n_blocks; + // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]); + ba_free(ba, blocks[blocknum], 1); + blocks[blocknum] = blocks[n_blocks - 1]; + n_blocks--; + } + } } - - ba->destroy(); + + ba->Destroy(); } - + // Check to see if it is first fit or best fit. -static void -test_ba2 (void) -{ - block_allocator allocator; - block_allocator *ba = &allocator; +static void test_ba2(void) { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; uint64_t b[6]; enum { BSIZE = 1024 }; - ba->create(100*512, BSIZE*512); - ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT); - assert(ba->allocated_limit()==100*512); + ba->Create(100 * 512, BSIZE * 512); + invariant(ba->AllocatedLimit() == 100 * 512); - ba_check_l (ba, 0, 0, 100); - ba_check_none (ba, 1); + ba_check_l(ba, 0, 0, 100); + ba_check_none(ba, 1); - ba_alloc (ba, 100, &b[0]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_none (ba, 2); + ba_alloc(ba, 100, &b[0]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_none(ba, 2); - ba_alloc (ba, BSIZE + 100, &b[1]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_none (ba, 3); + ba_alloc(ba, BSIZE + 100, &b[1]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_none(ba, 3); - ba_alloc (ba, 100, &b[2]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_none (ba, 4); + ba_alloc(ba, 100, &b[2]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_none(ba, 4); - ba_alloc (ba, 100, &b[3]); - ba_alloc (ba, 100, &b[4]); - ba_alloc (ba, 100, &b[5]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); - - ba_free (ba, 4*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 5*BSIZE, 100); - ba_check_l (ba, 4, 6*BSIZE, 100); - ba_check_l (ba, 5, 7*BSIZE, 100); - ba_check_none (ba, 6); + ba_alloc(ba, 100, &b[3]); + ba_alloc(ba, 100, &b[4]); + ba_alloc(ba, 100, &b[5]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); + + ba_free(ba, 4 * BSIZE, 100); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 5 * BSIZE, 100); + ba_check_l(ba, 4, 6 * BSIZE, 100); + ba_check_l(ba, 5, 7 * BSIZE, 100); + ba_check_none(ba, 6); uint64_t b2; ba_alloc(ba, 100, &b2); - assert(b2==4*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); + invariant(b2 == 4 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); - ba_free (ba, BSIZE); - ba_free (ba, 5*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 2, 4*BSIZE, 100); - ba_check_l (ba, 3, 6*BSIZE, 100); - ba_check_l (ba, 4, 7*BSIZE, 100); - ba_check_none (ba, 5); + ba_free(ba, BSIZE, 100); + ba_free(ba, 5 * BSIZE, 100); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 2, 4 * BSIZE, 100); + ba_check_l(ba, 3, 6 * BSIZE, 100); + ba_check_l(ba, 4, 7 * BSIZE, 100); + ba_check_none(ba, 5); - // This alloc will allocate the first block after the reserve space in the case of first fit. + // This alloc will allocate the first block after the reserve space in the + // case of first fit. uint64_t b3; ba_alloc(ba, 100, &b3); - assert(b3== BSIZE); // First fit. + invariant(b3 == BSIZE); // First fit. // if (b3==5*BSIZE) then it is next fit. // Now 5*BSIZE is free uint64_t b5; ba_alloc(ba, 100, &b5); - assert(b5==5*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); + invariant(b5 == 5 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); // Now all blocks are busy uint64_t b6, b7, b8; ba_alloc(ba, 100, &b6); ba_alloc(ba, 100, &b7); ba_alloc(ba, 100, &b8); - assert(b6==8*BSIZE); - assert(b7==9*BSIZE); - assert(b8==10*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_l (ba, 7, 8*BSIZE, 100); - ba_check_l (ba, 8, 9*BSIZE, 100); - ba_check_l (ba, 9, 10*BSIZE, 100); - ba_check_none (ba, 10); - - ba_free(ba, 9*BSIZE); - ba_free(ba, 7*BSIZE); + invariant(b6 == 8 * BSIZE); + invariant(b7 == 9 * BSIZE); + invariant(b8 == 10 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_l(ba, 7, 8 * BSIZE, 100); + ba_check_l(ba, 8, 9 * BSIZE, 100); + ba_check_l(ba, 9, 10 * BSIZE, 100); + ba_check_none(ba, 10); + + ba_free(ba, 9 * BSIZE, 100); + ba_free(ba, 7 * BSIZE, 100); uint64_t b9; ba_alloc(ba, 100, &b9); - assert(b9==7*BSIZE); + invariant(b9 == 7 * BSIZE); - ba_free(ba, 5*BSIZE); - ba_free(ba, 2*BSIZE); + ba_free(ba, 5 * BSIZE, 100); + ba_free(ba, 2 * BSIZE, BSIZE + 100); uint64_t b10, b11; ba_alloc(ba, 100, &b10); - assert(b10==2*BSIZE); + invariant(b10 == 2 * BSIZE); ba_alloc(ba, 100, &b11); - assert(b11==3*BSIZE); + invariant(b11 == 3 * BSIZE); ba_alloc(ba, 100, &b11); - assert(b11==5*BSIZE); + invariant(b11 == 5 * BSIZE); - ba->destroy(); + ba->Destroy(); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - enum block_allocator::allocation_strategy strategies[] = { - block_allocator::BA_STRATEGY_FIRST_FIT, - block_allocator::BA_STRATEGY_BEST_FIT, - block_allocator::BA_STRATEGY_PADDED_FIT, - block_allocator::BA_STRATEGY_HEAT_ZONE, - }; - for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) { - test_ba0(strategies[i]); - test_ba1(strategies[i], 0); - test_ba1(strategies[i], 10); - test_ba1(strategies[i], 20); - } +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { + test_ba0(); + test_ba1(0); + test_ba1(10); + test_ba1(20); test_ba2(); return 0; } diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc index a7c48ef709a..ee68ab3ef0b 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc @@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // #5978 is fixed. Here is what we do. We have four pairs with // blocknums and fullhashes of 1,2,3,4. The cachetable has only // two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4. -// We pin all four with expensive write locks. Then, on backgroud threads, +// We pin all four with expensive write locks. Then, on background threads, // we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and // we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this // enough times, and we should see a deadlock before the fix, and no deadlock diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc index be4bae898be..51cf70c3e76 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc @@ -77,7 +77,7 @@ flush ( // // test the following things for simple cloning: -// - verifies that after teh checkpoint ends, the PAIR is properly +// - verifies that after the checkpoint ends, the PAIR is properly // dirty or clean based on the second unpin // static void diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc index cb03a23e0fc..7abd2267a7e 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc @@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" -static int -int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) { - int64_t x = *(int64_t *) a->data; - int64_t y = *(int64_t *) b->data; +static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) { + int64_t x = *(int64_t *)a->data; + int64_t y = *(int64_t *)b->data; - if (xy) return 1; + if (x < y) + return -1; + if (x > y) + return 1; return 0; } -static void -test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { +static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; PAIR_ATTR attr; - + // first test that prefetching everything should work - memset(&cursor->range_lock_left_key, 0 , sizeof(DBT)); - memset(&cursor->range_lock_right_key, 0 , sizeof(DBT)); + memset(&cursor->range_lock_left_key, 0, sizeof(DBT)); + memset(&cursor->range_lock_right_key, 0, sizeof(DBT)); cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; cursor->disable_prefetching = false; - + ftnode_fetch_extra bfe; // quick test to see that we have the right behavior when we set // disable_prefetching to true cursor->disable_prefetching = true; - bfe.create_for_prefetch( ft_h, cursor); + bfe.create_for_prefetch(ft_h, cursor); FTNODE_DISK_DATA ndd = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); // now enable prefetching again cursor->disable_prefetching = false; - - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { uint64_t left_key = 150; toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t)); cursor->left_is_neg_infty = false; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { uint64_t right_key = 151; toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t)); cursor->right_is_pos_infty = false; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); left_key = 100000; right_key = 100000; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_free(ndd); toku_ftnode_free(&dn); left_key = 100; right_key = 100; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { toku_free(cursor); } -static void -test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { +static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; FTNODE_DISK_DATA ndd = NULL; PAIR_ATTR attr; - + // first test that prefetching everything should work - memset(&cursor->range_lock_left_key, 0 , sizeof(DBT)); - memset(&cursor->range_lock_right_key, 0 , sizeof(DBT)); + memset(&cursor->range_lock_left_key, 0, sizeof(DBT)); + memset(&cursor->range_lock_right_key, 0, sizeof(DBT)); cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; - + uint64_t left_key = 150; uint64_t right_key = 151; DBT left, right; @@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { ftnode_fetch_extra bfe; bfe.create_for_subset_read( - ft_h, - NULL, - &left, - &right, - false, - false, - false, - false - ); - + ft_h, NULL, &left, &right, false, false, false, false); + // fake the childnum to read // set disable_prefetching ON bfe.child_to_read = 2; bfe.disable_prefetching = true; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); toku_ftnode_free(&dn); toku_free(ndd); // fake the childnum to read bfe.child_to_read = 2; bfe.disable_prefetching = false; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); toku_ftnode_free(&dn); toku_free(ndd); // fake the childnum to read bfe.child_to_read = 0; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); toku_ftnode_free(&dn); toku_free(ndd); toku_free(cursor); } - -static void -test_prefetching(void) { +static void test_prefetching(void) { // struct ft_handle source_ft; struct ftnode sn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -327,7 +342,7 @@ test_prefetching(void) { uint64_t key1 = 100; uint64_t key2 = 200; - + MALLOC_N(sn.n_children, sn.bp); DBT pivotkeys[2]; toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1)); @@ -336,13 +351,13 @@ test_prefetching(void) { BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_BLOCKNUM(&sn, 2).b = 40; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; - BP_STATE(&sn,2) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; + BP_STATE(&sn, 2) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); set_BNC(&sn, 2, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -352,7 +367,7 @@ test_prefetching(void) { CKERR(r); // data in the buffers does not matter in this test - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -363,41 +378,48 @@ test_prefetching(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(int64_key_cmp, nullptr); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); - test_prefetch_read(fd, ft, ft_h); + test_prefetch_read(fd, ft, ft_h); test_subset_read(fd, ft, ft_h); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); @@ -405,11 +427,12 @@ test_prefetching(void) { toku_free(ft); toku_free(ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { test_prefetching(); return 0; diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc index ceef3772e2a..26a3dae673c 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc @@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/cursor.h" -enum ftnode_verify_type { - read_all=1, - read_compressed, - read_none -}; +enum ftnode_verify_type { read_all = 1, read_compressed, read_none }; #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif -static int -string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { char *CAST_FROM_VOIDP(s, a->data); char *CAST_FROM_VOIDP(t, b->data); return strcmp(s, t); } -static void -le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen) -{ +static void le_add_to_bn(bn_data *bn, + uint32_t idx, + const char *key, + int keylen, + const char *val, + int vallen) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keylen, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char memcpy(r->u.clean.val, val, vallen); } - -static void -le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val) -{ +static void le_malloc(bn_data *bn, + uint32_t idx, + const char *key, + const char *val) { int keylen = strlen(key) + 1; int vallen = strlen(val) + 1; le_add_to_bn(bn, idx, key, keylen, val, vallen); } - -static void -test1(int fd, FT ft_h, FTNODE *dn) { +static void test1(int fd, FT ft_h, FTNODE *dn) { int r; ftnode_fetch_extra bfe_all; bfe_all.create_for_full_read(ft_h); FTNODE_DISK_DATA ndd = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all); bool is_leaf = ((*dn)->height == 0); - assert(r==0); + invariant(r == 0); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and NOT get rid of anything PAIR_ATTR attr; - memset(&attr,0,sizeof(attr)); + memset(&attr, 0, sizeof(attr)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and get compress all toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); - } - else { - assert(BP_STATE(*dn,i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); + } else { + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } } PAIR_ATTR size; bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and get compress all toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); + } else { + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } - else { - assert(BP_STATE(*dn,i) == PT_ON_DISK); - } - } + } req = toku_ftnode_pf_req_callback(*dn, &bfe_all); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } (*dn)->dirty = 1; toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); @@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) { toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } toku_free(ndd); toku_ftnode_free(dn); } - -static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) { +static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) { return 0; } -static void -test2(int fd, FT ft_h, FTNODE *dn) { +static void test2(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); ft_search search; - + ftnode_fetch_extra bfe_subset; bfe_subset.create_for_subset_read( ft_h, - ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), + ft_search_init( + &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), &left, &right, true, true, false, - false - ); + false); FTNODE_DISK_DATA ndd = NULL; - int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset); - assert(r==0); + int r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset); + invariant(r == 0); bool is_leaf = ((*dn)->height == 0); - // at this point, although both partitions are available, only the + // at this point, although both partitions are available, only the // second basement node should have had its clock // touched - assert(BP_STATE(*dn, 0) == PT_AVAIL); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 0)); - assert(!BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == PT_AVAIL); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 0)); + invariant(!BP_SHOULD_EVICT(*dn, 1)); PAIR_ATTR attr; - memset(&attr,0,sizeof(attr)); + memset(&attr, 0, sizeof(attr)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 1)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); + invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr); - assert(BP_STATE(*dn, 0) == PT_AVAIL); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 0)); - assert(!BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == PT_AVAIL); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 0)); + invariant(!BP_SHOULD_EVICT(*dn, 1)); toku_free(ndd); toku_ftnode_free(dn); } -static void -test3_leaf(int fd, FT ft_h, FTNODE *dn) { +static void test3_leaf(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - + ftnode_fetch_extra bfe_min; bfe_min.create_for_min_read(ft_h); FTNODE_DISK_DATA ndd = NULL; - int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min); - assert(r==0); + int r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min); + invariant(r == 0); // // make sure we have a leaf // - assert((*dn)->height == 0); + invariant((*dn)->height == 0); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn, i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } toku_ftnode_free(dn); toku_free(ndd); } -static void -test_serialize_nonleaf(void) { +static void test_serialize_nonleaf(void) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -265,11 +253,11 @@ test_serialize_nonleaf(void) { sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -281,11 +269,38 @@ test_serialize_nonleaf(void) { toku::comparator cmp; cmp.create(string_key_cmp, nullptr); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "a", + 2, + "aval", + 5, + FT_NONE, + next_dummymsn(), + xids_0, + true, + cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "b", + 2, + "bval", + 5, + FT_NONE, + next_dummymsn(), + xids_123, + false, + cmp); + toku_bnc_insert_msg(BNC(&sn, 1), + "x", + 2, + "xval", + 5, + FT_NONE, + next_dummymsn(), + xids_234, + true, + cmp); - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -297,35 +312,41 @@ test_serialize_nonleaf(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); test1(fd, ft_h, &dn); test2(fd, ft_h, &dn); @@ -333,22 +354,26 @@ test_serialize_nonleaf(void) { toku_destroy_ftnode_internals(&sn); toku_free(ndd); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); ft_h->cmp.destroy(); toku_free(ft_h); toku_free(ft); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf(void) { +static void test_serialize_leaf(void) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -364,8 +389,8 @@ test_serialize_leaf(void) { MALLOC_N(sn.n_children, sn.bp); DBT pivotkey; sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn()); le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval"); @@ -378,51 +403,59 @@ test_serialize_leaf(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); test1(fd, ft_h, &dn); - test3_leaf(fd, ft_h,&dn); + test3_leaf(fd, ft_h, &dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); toku_free(ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { initialize_dummymsn(); test_serialize_nonleaf(); test_serialize_leaf(); diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc index 9828f49513c..d50488ae197 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc @@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include #include "test.h" - - #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif const double USECS_PER_SEC = 1000000.0; -static void -le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen) -{ +static void le_add_to_bn(bn_data *bn, + uint32_t idx, + char *key, + int keylen, + char *val, + int vallen) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keylen, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va memcpy(r->u.clean.val, val, vallen); } -static int -long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { const long *CAST_FROM_VOIDP(x, a->data); const long *CAST_FROM_VOIDP(y, b->data); return (*x > *y) - (*x < *y); } -static void -test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { +static void test_serialize_leaf(int valsize, + int nelts, + double entropy, + int ser_runs, + int deser_runs) { // struct ft_handle source_ft; struct ftnode *sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de MALLOC_N(sn->n_children, sn->bp); sn->pivotkeys.create_empty(); for (int i = 0; i < sn->n_children; ++i) { - BP_STATE(sn,i) = PT_AVAIL; + BP_STATE(sn, i) = PT_AVAIL; set_BLB(sn, i, toku_create_empty_bn()); } int nperbn = nelts / sn->n_children; @@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de k = ck * nperbn + i; char buf[valsize]; int c; - for (c = 0; c < valsize * entropy; ) { - int *p = (int *) &buf[c]; + for (c = 0; c < valsize * entropy;) { + int *p = (int *)&buf[c]; *p = rand(); c += sizeof(*p); } memset(&buf[c], 0, valsize - c); le_add_to_bn( - BLB_DATA(sn,ck), - i, - (char *)&k, - sizeof k, - buf, - sizeof buf - ); + BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf); } if (ck < 7) { DBT pivotkey; - sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); + sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), + ck); } } @@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } struct timeval total_start; @@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de gettimeofday(&t[0], NULL); ndd = NULL; sn->dirty = 1; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), sn, &ndd, true, ft->ft, false); + invariant(r == 0); gettimeofday(&t[1], NULL); total_start.tv_sec += t[0].tv_sec; total_start.tv_usec += t[0].tv_usec; @@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de toku_free(ndd); } double dt; - dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt = (total_end.tv_sec - total_start.tv_sec) + + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); dt *= 1000; dt /= ser_runs; - printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs); + printf( + "serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs); - //reset + // reset total_start.tv_sec = total_start.tv_usec = 0; total_end.tv_sec = total_end.tv_usec = 0; @@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); - assert(r==0); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + invariant(r == 0); gettimeofday(&t[1], NULL); total_start.tv_sec += t[0].tv_sec; @@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de toku_ftnode_free(&dn); toku_free(ndd2); } - dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt = (total_end.tv_sec - total_start.tv_sec) + + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); dt *= 1000; dt /= deser_runs; - printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs); - printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n", - tokutime_to_seconds(bfe.io_time)*1000, - tokutime_to_seconds(bfe.decompress_time)*1000, - tokutime_to_seconds(bfe.deserialize_time)*1000, - deser_runs - ); + printf( + "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs); + printf( + "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf " + "(average of %d runs)\n", + tokutime_to_seconds(bfe.io_time) * 1000, + tokutime_to_seconds(bfe.decompress_time) * 1000, + tokutime_to_seconds(bfe.deserialize_time) * 1000, + deser_runs); toku_ftnode_free(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { +static void test_serialize_nonleaf(int valsize, + int nelts, + double entropy, + int ser_runs, + int deser_runs) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_BLOCKNUM(&sn, i).b = 30 + (i*5); - BP_STATE(&sn,i) = PT_AVAIL; + BP_BLOCKNUM(&sn, i).b = 30 + (i * 5); + BP_STATE(&sn, i) = PT_AVAIL; set_BNC(&sn, i, toku_create_empty_nl()); } - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); @@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int k = ck * nperchild + i; char buf[valsize]; int c; - for (c = 0; c < valsize * entropy; ) { - int *p = (int *) &buf[c]; + for (c = 0; c < valsize * entropy;) { + int *p = (int *)&buf[c]; *p = rand(); c += sizeof(*p); } memset(&buf[c], 0, valsize - c); - toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp); + toku_bnc_insert_msg(bnc, + &k, + sizeof k, + buf, + valsize, + FT_NONE, + next_dummymsn(), + xids_123, + true, + cmp); } if (ck < 7) { DBT pivotkey; @@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int } } - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); cmp.destroy(); @@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } struct timeval t[2]; gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); gettimeofday(&t[1], NULL); double dt; - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); + dt = (t[1].tv_sec - t[0].tv_sec) + + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt *= 1000; - printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); + printf( + "serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); ftnode_fetch_extra bfe; bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); - assert(r==0); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + invariant(r == 0); gettimeofday(&t[1], NULL); - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); + dt = (t[1].tv_sec - t[0].tv_sec) + + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt *= 1000; - printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs); - printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n", - tokutime_to_seconds(bfe.io_time)*1000, - tokutime_to_seconds(bfe.decompress_time)*1000, - tokutime_to_seconds(bfe.deserialize_time)*1000, - deser_runs - ); + printf( + "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs); + printf( + "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf " + "(IGNORED RUNS=%d)\n", + tokutime_to_seconds(bfe.io_time) * 1000, + tokutime_to_seconds(bfe.decompress_time) * 1000, + tokutime_to_seconds(bfe.deserialize_time) * 1000, + deser_runs); toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); ft_h->cmp.destroy(); @@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int toku_free(ndd); toku_free(ndd2); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { const int DEFAULT_RUNS = 5; long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS; double entropy = 0.3; if (argc != 3 && argc != 5) { - fprintf(stderr, "Usage: %s [ ]\n", argv[0]); + fprintf(stderr, + "Usage: %s [ " + "]\n", + argv[0]); fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS); return 2; } diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc index 332aaa0c170..0cddaf19651 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc @@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" #include "bndata.h" - - #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif -static size_t -le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize) -{ +static size_t le_add_to_bn(bn_data *bn, + uint32_t idx, + const char *key, + int keysize, + const char *val, + int valsize) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keysize, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha } class test_key_le_pair { - public: + public: uint32_t keylen; - char* keyp; + char *keyp; LEAFENTRY le; test_key_le_pair() : keylen(), keyp(), le() {} void init(const char *_keyp, const char *_val) { init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1); } - void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) { + void init(const char *_keyp, + uint32_t _keylen, + const char *_val, + uint32_t _vallen) { keylen = _keylen; CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen))); @@ -95,126 +92,144 @@ class test_key_le_pair { } }; -enum ftnode_verify_type { - read_all=1, - read_compressed, - read_none -}; +enum ftnode_verify_type { read_all = 1, read_compressed, read_none }; -static int -string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { char *CAST_FROM_VOIDP(s, a->data); char *CAST_FROM_VOIDP(t, b->data); return strcmp(s, t); } -static void -setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) { +static void setup_dn(enum ftnode_verify_type bft, + int fd, + FT ft_h, + FTNODE *dn, + FTNODE_DISK_DATA *ndd) { int r; if (bft == read_all) { ftnode_fetch_extra bfe; bfe.create_for_full_read(ft_h); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); - assert(r==0); - } - else if (bft == read_compressed || bft == read_none) { + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + invariant(r == 0); + } else if (bft == read_compressed || bft == read_none) { ftnode_fetch_extra bfe; bfe.create_for_min_read(ft_h); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); - assert(r==0); - // assert all bp's are compressed or on disk. + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + invariant(r == 0); + // invariant all bp's are compressed or on disk. for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED || + BP_STATE(*dn, i) == PT_ON_DISK); } // if read_none, get rid of the compressed bp's if (bft == read_none) { if ((*dn)->height == 0) { - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - // assert all bp's are on disk + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); + // invariant all bp's are on disk for (int i = 0; i < (*dn)->n_children; i++) { if ((*dn)->height == 0) { - assert(BP_STATE(*dn,i) == PT_ON_DISK); - assert(is_BNULL(*dn, i)); - } - else { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); + invariant(BP_STATE(*dn, i) == PT_ON_DISK); + invariant(is_BNULL(*dn, i)); + } else { + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); } } - } - else { + } else { // first decompress everything, and make sure // that it is available // then run partial eviction to get it compressed PAIR_ATTR attr; bfe.create_for_full_read(ft_h); - assert(toku_ftnode_pf_req_callback(*dn, &bfe)); + invariant(toku_ftnode_pf_req_callback(*dn, &bfe)); r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); - assert(r==0); - // assert all bp's are available + invariant(r == 0); + // invariant all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - // assert all bp's are still available, because we touched the clock - assert(BP_STATE(*dn,i) == PT_AVAIL); - // now assert all should be evicted - assert(BP_SHOULD_EVICT(*dn, i)); + // invariant all bp's are still available, because we touched + // the clock + invariant(BP_STATE(*dn, i) == PT_AVAIL); + // now invariant all should be evicted + invariant(BP_SHOULD_EVICT(*dn, i)); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); } } } // now decompress them bfe.create_for_full_read(ft_h); - assert(toku_ftnode_pf_req_callback(*dn, &bfe)); + invariant(toku_ftnode_pf_req_callback(*dn, &bfe)); PAIR_ATTR attr; r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); - assert(r==0); - // assert all bp's are available + invariant(r == 0); + // invariant all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // continue on with test - } - else { + } else { // if we get here, this is a test bug, NOT a bug in development code - assert(false); + invariant(false); } } -static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) { +static void write_sn_to_disk(int fd, + FT_HANDLE ft, + FTNODE sn, + FTNODE_DISK_DATA *src_ndd, + bool do_clone) { int r; if (do_clone) { - void* cloned_node_v = NULL; + void *cloned_node_v = NULL; PAIR_ATTR attr; long clone_size; - toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft); + toku_ftnode_clone_callback( + sn, &cloned_node_v, &clone_size, &attr, false, ft->ft); FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v); - r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false); + invariant(r == 0); toku_ftnode_free(&cloned_node); - } - else { - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false); - assert(r==0); + } else { + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false); + invariant(r == 0); } } -static void -test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft, + bool do_clone) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; -#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 }) -#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 }) +#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42}) +#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84}) sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK; sn.flags = 0x11223344; @@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { MALLOC_N(sn.n_children, sn.bp); DBT pivotkey; sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn()); le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5); le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5); - BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 }); + BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73}); BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK; FT_HANDLE XMALLOC(ft); @@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } - //Want to use block #20 + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children>=1); - assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children >= 1); + invariant(dn->max_msn_applied_to_node_on_disk.msn == + POSTSERIALIZE_MSN_ON_DISK.msn); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair elts[3]; elts[0].init("a", "aval"); @@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn); - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn == + POSTSERIALIZE_MSN_ON_DISK.msn); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); - assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(elts[last_i].le)); + invariant(memcmp(curr_le, + elts[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + elts[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } - } - assert(last_i == 3); + invariant(last_i == 3); } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; - const int keylens = 256*1024, vallens = 0; + const int keylens = 256 * 1024, vallens = 0; const uint32_t nrows = 8; - // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } for (uint32_t i = 0; i < nrows; ++i) { // one basement per row char key[keylens], val[vallens]; - key[keylens-1] = '\0'; + key[keylens - 1] = '\0'; char c = 'a' + i; - memset(key, c, keylens-1); - le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); - if (i < nrows-1) { + memset(key, c, keylens - 1); + le_add_to_bn(BLB_DATA(&sn, i), + 0, + (char *)&key, + sizeof(key), + (char *)&val, + sizeof(val)); + if (i < nrows - 1) { uint32_t keylen; - void* curr_key; + void *curr_key; BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key); DBT pivotkey; - sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i); + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), + i); } } @@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - - assert(dn->blocknum.b==20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->blocknum.b == 20); + + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[keylens], val[vallens]; - key[keylens-1] = '\0'; + key[keylens - 1] = '\0'; for (uint32_t i = 0; i < nrows; ++i) { char c = 'a' + i; - memset(key, c, keylens-1); - les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); + memset(key, c, keylens - 1); + les[i].init( + (char *)&key, sizeof(key), (char *)&val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + les[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } } - assert(last_i == nrows); + invariant(last_i == nrows); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; - const uint32_t nrows = 196*1024; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + const uint32_t nrows = 196 * 1024; + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { XMALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; - set_BLB(&sn, i, toku_create_empty_bn()); + BP_STATE(&sn, i) = PT_AVAIL; + set_BLB(&sn, i, toku_create_empty_bn()); } size_t total_size = 0; for (uint32_t i = 0; i < nrows; ++i) { uint32_t key = i; uint32_t val = i; - total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); + total_size += le_add_to_bn(BLB_DATA(&sn, 0), + i, + (char *)&key, + sizeof(key), + (char *)&val, + sizeof(val)); } FT_HANDLE XMALLOC(ft); @@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { int key = 0, val = 0; for (uint32_t i = 0; i < nrows; ++i, key++, val++) { - les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); + les[i].init( + (char *)&key, sizeof(key), (char *)&val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data); - void* tmp = les[last_i].keyp; + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + uint32_t *CAST_FROM_VOIDP(pivot, + dn->pivotkeys.get_pivot(bn).data); + void *tmp = les[last_i].keyp; uint32_t *CAST_FROM_VOIDP(item, tmp); - assert(*pivot >= *item); + invariant(*pivot >= *item); } // TODO for later, get a key comparison here as well last_i++; } // don't check soft_copy_is_up_to_date or seqinsert - assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024); // BN_MAX_SIZE, apt to change + invariant(BLB_DATA(dn, bn)->get_disk_size() < + 128 * 1024); // BN_MAX_SIZE, apt to change } - assert(last_i == nrows); + invariant(last_i == nrows); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; const uint32_t nrows = 7; const size_t key_size = 8; - const size_t val_size = 512*1024; - // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + const size_t val_size = 512 * 1024; + // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) sn.n_children = 1; sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; - + MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } for (uint32_t i = 0; i < nrows; ++i) { char key[key_size], val[val_size]; - key[key_size-1] = '\0'; - val[val_size-1] = '\0'; + key[key_size - 1] = '\0'; + val[val_size - 1] = '\0'; char c = 'a' + i; - memset(key, c, key_size-1); - memset(val, c, val_size-1); - le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size); + memset(key, c, key_size - 1); + memset(val, c, val_size - 1); + le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size); } FT_HANDLE XMALLOC(ft); @@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[key_size], val[val_size]; - key[key_size-1] = '\0'; - val[val_size-1] = '\0'; + key[key_size - 1] = '\0'; + val[val_size - 1] = '\0'; for (uint32_t i = 0; i < nrows; ++i) { char c = 'a' + i; - memset(key, c, key_size-1); - memset(val, c, val_size-1); + memset(key, c, key_size - 1); + memset(val, c, val_size - 1); les[i].init(key, key_size, val, val_size); } } const uint32_t npartitions = dn->n_children; - assert(npartitions == nrows); + invariant(npartitions == nrows); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + (char *)(les[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } // don't check soft_copy_is_up_to_date or seqinsert } - assert(last_i == 7); + invariant(last_i == 7); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_empty_basement_nodes( + enum ftnode_verify_type bft, + bool do_clone) { struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool toku_fill_dbt(&pivotkeys[5], "x", 2); sn.pivotkeys.create_from_dbts(pivotkeys, 6); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); BLB_SEQINSERT(&sn, i) = 0; } @@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children>0); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children > 0); { test_key_le_pair elts[3]; - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. elts[0].init("a", "aval"); elts[1].init("b", "bval"); @@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); - assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(elts[last_i].le)); + invariant(memcmp(curr_le, + elts[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + (char *)(elts[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } - } - assert(last_i == 3); + invariant(last_i == 3); } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_multiple_empty_basement_nodes( + enum ftnode_verify_type bft, + bool do_clone) { struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b toku_fill_dbt(&pivotkeys[2], "A", 2); sn.pivotkeys.create_from_dbts(pivotkeys, 3); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } @@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children == 1); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children == 1); { const uint32_t npartitions = dn->n_children; for (uint32_t i = 0; i < npartitions; ++i) { - assert(dest_ndd[i].start > 0); - assert(dest_ndd[i].size > 0); + invariant(dest_ndd[i].start > 0); + invariant(dest_ndd[i].size > 0); if (i > 0) { - assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); + invariant(dest_ndd[i].start >= + dest_ndd[i - 1].start + dest_ndd[i - 1].size); } - assert(BLB_DATA(dn, i)->num_klpairs() == 0); + invariant(BLB_DATA(dn, i)->num_klpairs() == 0); } } - + toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { toku::comparator cmp; cmp.create(string_key_cmp, nullptr); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "a", + 2, + "aval", + 5, + FT_NONE, + next_dummymsn(), + xids_0, + true, + cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "b", + 2, + "bval", + 5, + FT_NONE, + next_dummymsn(), + xids_123, + false, + cmp); + toku_bnc_insert_msg(BNC(&sn, 1), + "x", + 2, + "xval", + 5, + FT_NONE, + next_dummymsn(), + xids_234, + true, + cmp); - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 1); - assert(dn->n_children==2); - assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0); - assert(dn->pivotkeys.get_pivot(0).size==6); - assert(BP_BLOCKNUM(dn,0).b==30); - assert(BP_BLOCKNUM(dn,1).b==35); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 1); + invariant(dn->n_children == 2); + invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0); + invariant(dn->pivotkeys.get_pivot(0).size == 6); + invariant(BP_BLOCKNUM(dn, 0).b == 30); + invariant(BP_BLOCKNUM(dn, 1).b == 35); message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer; message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer; message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer; message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer; - assert(src_msg_buffer1->equals(dest_msg_buffer1)); - assert(src_msg_buffer2->equals(dest_msg_buffer2)); + invariant(src_msg_buffer1->equals(dest_msg_buffer1)); + invariant(src_msg_buffer2->equals(dest_msg_buffer2)); toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); @@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { initialize_dummymsn(); test_serialize_nonleaf(read_none, false); @@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false); test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, + false); test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true); test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, + true); test_serialize_leaf_with_empty_basement_nodes(read_none, false); test_serialize_leaf_with_empty_basement_nodes(read_all, false); diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc index 598a1cc7085..706bd94fbc3 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc @@ -164,17 +164,16 @@ static void test_read_what_was_written (void) { int r; const int NVALS=10000; - if (verbose) printf("test_read_what_was_written(): "); fflush(stdout); + if (verbose) { + printf("test_read_what_was_written(): "); fflush(stdout); + } unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); - toku_cachetable_close(&ct); - - + toku_cachetable_close(&ct); /* Now see if we can read an empty tree in. */ toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); @@ -189,8 +188,6 @@ static void test_read_what_was_written (void) { r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); - - /* Now see if we can read it in and get the value. */ toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc index 53973794eae..aeb5a897c48 100644 --- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc @@ -109,7 +109,9 @@ static int run_test(void) r = pqueue_pop(pq, &node); assert(r==0); if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data)); if ( *(int*)(node->key->data) != i ) { - if (verbose) printf("FAIL\n"); return -1; + if (verbose) + printf("FAIL\n"); + return -1; } } pqueue_free(pq); diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc index a78f787cdf2..f2004964862 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc @@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) { do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200); invariant(do_garbage_collect); - // It is definately worth doing when the above case is true + // It is definitely worth doing when the above case is true // and there is more than one provisional entry. ule.num_cuxrs = 1; ule.num_puxrs = 2; diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc index 419af550545..71357a1e16a 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc @@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; -static void test_oldest_referenced_xid_gets_propogated(void) { +static void test_oldest_referenced_xid_gets_propagated(void) { int r; CACHETABLE ct; FT_HANDLE t; @@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) { toku_ft_flush_some_child(t->ft, node, &fa); // pin the child, verify that oldest referenced xid was - // propogated from parent to child during the flush + // propagated from parent to child during the flush toku_pin_ftnode( t->ft, child_nonleaf_blocknum, @@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) { int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { default_parse_args(argc, argv); - test_oldest_referenced_xid_gets_propogated(); + test_oldest_referenced_xid_gets_propagated(); return 0; } diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc similarity index 55% rename from storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h rename to storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc index 8aded3898c1..ea4f9374dc3 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc @@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." -#pragma once +#include "ft/serialize/rbtree_mhs.h" +#include "test.h" +#include +#include +#include +#include -#include +static void test_insert_remove(void) { + uint64_t i; + MhsRbTree::Tree *tree = new MhsRbTree::Tree(); + verbose = 0; -#include "ft/serialize/block_allocator.h" + tree->Insert({0, 100}); -// Block allocation strategy implementations + for (i = 0; i < 10; i++) { + tree->Remove(3); + tree->Remove(2); + } + tree->ValidateBalance(); + tree->ValidateMhs(); -class block_allocator_strategy { -public: - static struct block_allocator::blockpair * - first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); + for (i = 0; i < 10; i++) { + tree->Insert({5 * i, 3}); + } + tree->ValidateBalance(); + tree->ValidateMhs(); - static struct block_allocator::blockpair * - best_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); + uint64_t offset = tree->Remove(2); + invariant(offset == 0); + offset = tree->Remove(10); + invariant(offset == 50); + offset = tree->Remove(3); + invariant(offset == 5); + tree->ValidateBalance(); + tree->ValidateMhs(); - static struct block_allocator::blockpair * - padded_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); + tree->Insert({48, 2}); + tree->Insert({50, 10}); - static struct block_allocator::blockpair * - heat_zone(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t heat); -}; + tree->ValidateBalance(); + tree->ValidateMhs(); + + tree->Insert({3, 7}); + offset = tree->Remove(10); + invariant(offset == 2); + tree->ValidateBalance(); + tree->ValidateMhs(); + tree->Dump(); + delete tree; +} + +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + + test_insert_remove(); + if (verbose) + printf("test ok\n"); + return 0; +} diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc new file mode 100644 index 00000000000..85f29ce9813 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc @@ -0,0 +1,102 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "ft/serialize/rbtree_mhs.h" +#include "test.h" +#include +#include +#include +#include + +#define N 1000000 +std::vector input_vector; +MhsRbTree::Node::BlockPair old_vector[N]; + +static int myrandom(int i) { return std::rand() % i; } + +static void generate_random_input() { + std::srand(unsigned(std::time(0))); + + // set some values: + for (uint64_t i = 1; i < N; ++i) { + input_vector.push_back({i, 0}); + old_vector[i] = {i, 0}; + } + // using built-in random generator: + std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom); +} + +static void test_insert_remove(void) { + int i; + MhsRbTree::Tree *tree = new MhsRbTree::Tree(); + verbose = 0; + generate_random_input(); + if (verbose) { + printf("\n we are going to insert the following block offsets\n"); + for (i = 0; i < N; i++) + printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt()); + } + for (i = 0; i < N; i++) { + tree->Insert(input_vector[i]); + // tree->ValidateBalance(); + } + tree->ValidateBalance(); + MhsRbTree::Node::BlockPair *p_bps = &old_vector[0]; + tree->ValidateInOrder(p_bps); + printf("min node of the tree:%" PRIu64 "\n", + rbn_offset(tree->MinNode()).ToInt()); + printf("max node of the tree:%" PRIu64 "\n", + rbn_offset(tree->MaxNode()).ToInt()); + + for (i = 0; i < N; i++) { + // tree->ValidateBalance(); + tree->RawRemove(input_vector[i]._offset.ToInt()); + } + + tree->Destroy(); + delete tree; +} + +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + + test_insert_remove(); + if (verbose) + printf("test ok\n"); + return 0; +} diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 407116b983c..90eee1e580a 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // functionality provided by roll.c is exposed by an autogenerated // header file, logheader.h // -// this (poorly) explains the absense of "roll.h" +// this (poorly) explains the absence of "roll.h" // these flags control whether or not we send commit messages for // various operations diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc index df830afd0df..c9464c3ed60 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc @@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) { txn->roll_info.spilled_rollback_head = ROLLBACK_NONE; txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE; } - // if we're commiting a child rollback, put its entries into the parent + // if we're committing a child rollback, put its entries into the parent // by pinning both child and parent and then linking the child log entry // list to the end of the parent log entry list. if (txn_has_current_rollback_log(txn)) { diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc index 68c94c2ad11..08d7c8874e5 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc @@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) { // flush an ununused log to disk, by allocating a size 0 blocknum in // the blocktable -static void -toku_rollback_flush_unused_log( - ROLLBACK_LOG_NODE log, - BLOCKNUM logname, - int fd, - FT ft, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ +static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log, + BLOCKNUM logname, + int fd, + FT ft, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone) { if (write_me) { DISKOFF offset; - ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX); + ft->blocktable.realloc_on_disk( + logname, 0, &offset, ft, fd, for_checkpoint); } if (!keep_me && !is_clone) { toku_free(log); diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc index ac393fbf179..e3dce6d27dd 100644 --- a/storage/tokudb/PerconaFT/ft/ule.cc +++ b/storage/tokudb/PerconaFT/ft/ule.cc @@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection( // by new txns. // 2.) There is only one committed entry, but the outermost // provisional entry is older than the oldest known referenced -// xid, so it must have commited. Therefor we can promote it to -// committed and get rid of the old commited entry. +// xid, so it must have committed. Therefor we can promote it to +// committed and get rid of the old committed entry. if (le->type != LE_MVCC) { return false; } diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc index 880f9a3a9bb..dbbea974a49 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc @@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) { if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata); // check the data size -#if __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) assert(maxdata > (1ULL << 32)); #elif __i386__ assert(maxdata < (1ULL << 32)); diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in index e1412cc9e14..1a34bf1ef45 100644 --- a/storage/tokudb/PerconaFT/portability/toku_config.h.in +++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in @@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #cmakedefine TOKU_DEBUG_PARANOID 1 #cmakedefine USE_VALGRIND 1 - #cmakedefine HAVE_ALLOCA_H 1 #cmakedefine HAVE_ARPA_INET_H 1 #cmakedefine HAVE_BYTESWAP_H 1 diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h index 11a3f3aa2b9..a1278ef0337 100644 --- a/storage/tokudb/PerconaFT/portability/toku_time.h +++ b/storage/tokudb/PerconaFT/portability/toku_time.h @@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default") // Get the value of tokutime for right now. We want this to be fast, so we expose the implementation as RDTSC. static inline tokutime_t toku_time_now(void) { +#if defined(__x86_64__) || defined(__i386__) uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; +#elif defined (__aarch64__) + uint64_t result; + __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result)); + return result; +#else +#error No timer implementation for this platform +#endif } static inline uint64_t toku_current_time_microsec(void) { diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h index 48e62ee49b2..fdaa561e3d0 100644 --- a/storage/tokudb/PerconaFT/src/indexer-internal.h +++ b/storage/tokudb/PerconaFT/src/indexer-internal.h @@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array. -// the array is a resizeable array with max size "max_keys" and current size "current_keys". +// the array is a resizable array with max size "max_keys" and current size "current_keys". // the ordered set is used by the hotindex undo function to collect the commit keys. struct indexer_commit_keys { int max_keys; // max number of keys diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc index 8d0b080b9fe..4c7f5336161 100644 --- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc +++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc @@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u } // inject "delete" message into ft with logging in recovery and rollback logs, -// and making assocation between txn and ft +// and making association between txn and ft static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) { int result = 0; @@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi } // inject "insert" message into ft with logging in recovery and rollback logs, -// and making assocation between txn and ft +// and making association between txn and ft static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) { int result = 0; diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test index 20df13923e6..7cce68e6ff8 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test @@ -1,3 +1,3 @@ -# commited insert +# committed insert key k1 insert committed 0 v100 diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc index 3f2f8d7455a..aaf77c503cc 100644 --- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc +++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc @@ -51,7 +51,7 @@ int DISALLOW_PUTS=0; int COMPRESS=0; enum {MAGIC=311}; -bool dup_row_at_end = false; // false: duplicate at the begining. true: duplicate at the end. The duplicated row is row 0. +bool dup_row_at_end = false; // false: duplicate at the beginning. true: duplicate at the end. The duplicated row is row 0. int dup_row_id = 0; // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning. Otherwise insert the row specified here. // diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc index a4dc0ea9236..2c905c5ff12 100644 --- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc +++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc @@ -156,7 +156,7 @@ do_args(int argc, char * const argv[]) { choices[i] = -1; } - char c; + int c; while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) { switch(c) { case 'v': diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc index a2b48e443cd..48843a0bd32 100644 --- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc +++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc @@ -166,7 +166,7 @@ run_test (void) { DB_BTREE_STAT64 s; r = db->stat64(db, NULL, &s); CKERR(r); - assert(s.bt_nkeys == 0); + assert(s.bt_nkeys == 1); r = db->close(db, 0); CKERR(r); @@ -176,7 +176,7 @@ run_test (void) { r = txn->commit(txn, 0); CKERR(r); r = db->stat64(db, NULL, &s); CKERR(r); - assert(s.bt_nkeys == 0); + assert(s.bt_nkeys == 1); } // verify update callback overwrites the row diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc index 8e5109cd2a9..f6111d4b67c 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc @@ -78,7 +78,7 @@ static void test_insert_many_gc(void) { // from having an MVCC stack of size 'N'. At the time of this // writing, we run full GC on leaf-inject when the leaf is // 32mb or larger. A good invariant is that the max LE size - // never grew larger than 35mb and that the max commited xr stack + // never grew larger than 35mb and that the max committed xr stack // length never exceeded 35 const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE"); const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR"); diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc index aaafe284906..88140dd1731 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc @@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // This test is a micro stress test that does multithreaded updates on a fixed size table. // There is also a thread that scans the table with bulk fetch, ensuring the sum is zero. // -// This test is targetted at stressing the locktree, hence the small table and many update threads. +// This test is targeted at stressing the locktree, hence the small table and many update threads. // static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) { diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc index fec454b8009..301eed1560e 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc @@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) { continue; } } - if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n"); + if (verbose>0) printf("%s", __FILE__); + if (verbose>1) printf("\n"); for (i=1; i<100; i++) test_txn_abort(i); if (verbose>1) printf("%s OK\n", __FILE__); diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h index 462a2a3d861..2d6c84126e1 100644 --- a/storage/tokudb/PerconaFT/src/ydb-internal.h +++ b/storage/tokudb/PerconaFT/src/ydb-internal.h @@ -114,7 +114,7 @@ struct __toku_db_env_internal { char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /) char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /) - char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absoulte with leading /) + char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /) fs_redzone_state fs_state; uint64_t fs_seq; // how many times has fs_poller run? diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess index da833146088..7501b1bee01 100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess @@ -1,10 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 -# Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2009-04-27' +timestamp='2016-06-22' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -17,9 +17,7 @@ timestamp='2009-04-27' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a @@ -27,16 +25,16 @@ timestamp='2009-04-27' # the same distribution terms that you use for the rest of that program. -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -56,8 +54,9 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, -2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward @@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null + | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? @@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in fi ;; *) - os=netbsd + os=netbsd ;; esac # The OS release @@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on @@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit ;; + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead @@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo s390-ibm-zvmoe exit ;; *:OS400:*:*) - echo powerpc-ibm-os400 + echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} @@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build SUN_ARCH="i386" @@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint${UNAME_RELEASE} exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit ;; + exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint${UNAME_RELEASE} exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit ;; + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit ;; + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit ;; + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} exit ;; @@ -477,8 +482,8 @@ EOF echo m88k-motorola-sysv3 exit ;; AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ @@ -491,7 +496,7 @@ EOF else echo i586-dg-dgux${UNAME_RELEASE} fi - exit ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 exit ;; @@ -548,7 +553,7 @@ EOF echo rs6000-ibm-aix3.2 fi exit ;; - *:AIX:*:[456]) + *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 @@ -591,52 +596,52 @@ EOF 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac + esac ;; + esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + sed 's/^ //' << EOF >$dummy.c - #define _HPUX_SOURCE - #include - #include + #define _HPUX_SOURCE + #include + #include - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa @@ -656,7 +661,7 @@ EOF # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | - grep __LP64__ >/dev/null + grep -q __LP64__ then HP_ARCH="hppa2.0w" else @@ -727,22 +732,22 @@ EOF exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit ;; + exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; @@ -766,14 +771,14 @@ EOF exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} @@ -785,13 +790,12 @@ EOF echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit ;; *:FreeBSD:*:*) - case ${UNAME_MACHINE} in - pc98) - echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in amd64) echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; *) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac exit ;; i*:CYGWIN*:*) @@ -800,19 +804,22 @@ EOF *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; - *:Interix*:[3456]*) - case ${UNAME_MACHINE} in + *:Interix*:*) + case ${UNAME_MACHINE} in x86) echo i586-pc-interix${UNAME_RELEASE} exit ;; - EM64T | authenticamd | genuineintel) + authenticamd | genuineintel | EM64T) echo x86_64-unknown-interix${UNAME_RELEASE} exit ;; IA64) @@ -822,6 +829,9 @@ EOF [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we @@ -851,92 +861,13 @@ EOF i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; - arm*:Linux:*:*) - eval $set_cc_for_build - if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_EABI__ - then - echo ${UNAME_MACHINE}-unknown-linux-gnu - else - echo ${UNAME_MACHINE}-unknown-linux-gnueabi - fi - exit ;; - avr32*:Linux:*:*) + aarch64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit ;; - crisv32:Linux:*:*) - echo crisv32-axis-linux-gnu - exit ;; - frv:Linux:*:*) - echo frv-unknown-linux-gnu - exit ;; - ia64:Linux:*:*) + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; - m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - or32:Linux:*:*) - echo or32-unknown-linux-gnu - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; @@ -946,14 +877,90 @@ EOF EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; padre:Linux:*:*) echo sparc-unknown-linux-gnu exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in @@ -962,14 +969,17 @@ EOF *) echo hppa-unknown-linux-gnu ;; esac exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu @@ -977,75 +987,18 @@ EOF sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; vax:Linux:*:*) echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^LIBC/{ - s: ::g - p - }'`" - test x"${LIBC}" != x && { - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" - exit - } - test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } - ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both @@ -1053,11 +1006,11 @@ EOF echo i386-sequent-sysv4 exit ;; i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. + # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} exit ;; i*86:OS/2:*:*) @@ -1074,7 +1027,7 @@ EOF i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit ;; i*86:*DOS:*:*) @@ -1089,7 +1042,7 @@ EOF fi exit ;; i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. + # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; @@ -1117,13 +1070,13 @@ EOF exit ;; pc:*:*:*) # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp - exit ;; + exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 exit ;; @@ -1158,8 +1111,8 @@ EOF /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ @@ -1182,7 +1135,7 @@ EOF rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} exit ;; SM[BE]S:UNIX_SV:*:*) @@ -1202,10 +1155,10 @@ EOF echo ns32k-sni-sysv fi exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm @@ -1231,11 +1184,11 @@ EOF exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + echo mips-nec-sysv${UNAME_RELEASE} else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv${UNAME_RELEASE} fi - exit ;; + exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos exit ;; @@ -1275,6 +1228,16 @@ EOF *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} @@ -1290,6 +1253,9 @@ EOF *:QNX:*:4*) echo i386-pc-qnx exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; NSE-?:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; @@ -1335,13 +1301,13 @@ EOF echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} + echo mips-sei-seiux${UNAME_RELEASE} exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` + UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; @@ -1359,6 +1325,9 @@ EOF i*86:AROS:*:*) echo ${UNAME_MACHINE}-pc-aros exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1381,11 +1350,11 @@ main () #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 - "4" + "4" #else - "" + "" #endif - ); exit (0); + ); exit (0); #endif #endif diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt index d3808483fea..30ca883c4ba 100644 --- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt @@ -1,6 +1,6 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO) -set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay) +set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify) foreach(tool ${tools}) add_executable(${tool} ${tool}.cc) add_dependencies(${tool} install_tdb_h) @@ -14,4 +14,3 @@ target_link_libraries(ftverify m) install(TARGETS tokuftdump DESTINATION bin COMPONENT tokukv_tools) install(TARGETS tokuft_logprint DESTINATION bin COMPONENT tokukv_tools) - diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc deleted file mode 100644 index cade7e5dfaf..00000000000 --- a/storage/tokudb/PerconaFT/tools/ba_replay.cc +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -// Replay a block allocator trace against different strategies and compare -// the results - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "ft/serialize/block_allocator.h" - -using std::map; -using std::set; -using std::string; -using std::vector; - -static int verbose = false; - -static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) { - if (!pred) { - fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line); - abort(); - } -} - -static char *trim_whitespace(char *line) { - // skip leading whitespace - while (isspace(*line)) { - line++; - } - return line; -} - -static int64_t parse_number(char **ptr, int line_num, int base) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - char *new_ptr; - int64_t n = strtoll(line, &new_ptr, base); - ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num); - ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num); - *ptr = new_ptr; - return n; -} - -static uint64_t parse_uint64(char **ptr, int line_num) { - int64_t n = parse_number(ptr, line_num, 10); - // we happen to know that the uint64's we deal with will - // take less than 63 bits (they come from pointers) - return static_cast(n); -} - -static string parse_token(char **ptr, int line_num) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - // parse the first token, which represents the traced function - char token[64]; - int r = sscanf(*ptr, "%64s", token); - ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num); - *ptr += strlen(token); - return string(token); -} - -static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - uint64_t offset, size; - int bytes_read; - int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read); - ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num); - *ptr += bytes_read; - return block_allocator::blockpair(offset, size); -} - -static char *strip_newline(char *line, bool *found) { - char *ptr = strchr(line, '\n'); - if (ptr != nullptr) { - if (found != nullptr) { - *found = true; - } - *ptr = '\0'; - } - return line; -} - -static char *read_trace_line(FILE *file) { - const int buf_size = 4096; - char buf[buf_size]; - std::stringstream ss; - while (true) { - if (fgets(buf, buf_size, file) == nullptr) { - break; - } - bool has_newline = false; - ss << strip_newline(buf, &has_newline); - if (has_newline) { - // end of the line, we're done out - break; - } - } - std::string s = ss.str(); - return s.size() ? toku_strdup(s.c_str()) : nullptr; -} - -static vector canonicalize_trace_from(FILE *file) { - // new trace, canonicalized from a raw trace - vector canonicalized_trace; - - // raw allocator id -> canonical allocator id - // - // keeps track of allocators that were created as part of the trace, - // and therefore will be part of the canonicalized trace. - uint64_t allocator_id_seq_num = 0; - map allocator_ids; - - // allocated offset -> allocation seq num - // - uint64_t allocation_seq_num = 0; - static const uint64_t ASN_NONE = (uint64_t) -1; - typedef map offset_seq_map; - - // raw allocator id -> offset_seq_map that tracks its allocations - map offset_to_seq_num_maps; - - int line_num = 0; - char *line; - while ((line = read_trace_line(file)) != nullptr) { - line_num++; - char *ptr = line; - - string fn = parse_token(&ptr, line_num); - int64_t allocator_id = parse_number(&ptr, line_num, 16); - - std::stringstream ss; - if (fn.find("ba_trace_create") != string::npos) { - ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num); - ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs", - "corrupted trace: bad fn", line, line_num); - - // we only convert the allocator_id to an allocator_id_seq_num - // in the canonical trace and leave the rest of the line as-is. - allocator_ids[allocator_id] = allocator_id_seq_num; - ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl; - allocator_id_seq_num++; - - // First, read passed the reserve / alignment values. - (void) parse_uint64(&ptr, line_num); - (void) parse_uint64(&ptr, line_num); - if (fn == "ba_trace_create_from_blockpairs") { - // For each blockpair created by this traceline, add its offset to the offset seq map - // with asn ASN_NONE so that later canonicalizations of `free' know whether to write - // down the asn or the raw offset. - offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; - while (*trim_whitespace(ptr) != '\0') { - const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); - (*map)[bp.offset] = ASN_NONE; - } - } - } else { - ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num); - uint64_t canonical_allocator_id = allocator_ids[allocator_id]; - - // this is the map that tracks allocations for this allocator - offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; - - if (fn == "ba_trace_alloc") { - const uint64_t size = parse_uint64(&ptr, line_num); - const uint64_t heat = parse_uint64(&ptr, line_num); - const uint64_t offset = parse_uint64(&ptr, line_num); - ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num); - - // remember that an allocation at `offset' has the current alloc seq num - (*map)[offset] = allocation_seq_num; - - // translate `offset = alloc(size)' to `asn = alloc(size)' - ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl; - allocation_seq_num++; - } else if (fn == "ba_trace_free") { - const uint64_t offset = parse_uint64(&ptr, line_num); - ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num); - - // get the alloc seq num for an allcation that occurred at `offset' - const uint64_t asn = (*map)[offset]; - map->erase(offset); - - // if there's an asn, then a corresponding ba_trace_alloc occurred and we should - // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs - // and we write the original offset. - if (asn != ASN_NONE) { - ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl; - } else { - ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl; - } - } else if (fn == "ba_trace_destroy") { - // Remove this allocator from both maps - allocator_ids.erase(allocator_id); - offset_to_seq_num_maps.erase(allocator_id); - - // translate `destroy(ptr_id) to destroy(canonical_id)' - ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl; - } else { - ba_replay_assert(false, "corrupted trace: bad fn", line, line_num); - } - } - canonicalized_trace.push_back(ss.str()); - - toku_free(line); - } - - if (allocator_ids.size() != 0) { - fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running"); - } - - return canonicalized_trace; -} - -struct streaming_variance_calculator { - int64_t n_samples; - int64_t mean; - int64_t variance; - - // math credit: AoCP, Donald Knuth, '62 - void add_sample(int64_t x) { - n_samples++; - if (n_samples == 1) { - mean = x; - variance = 0; - } else { - int64_t old_mean = mean; - mean = old_mean + ((x - old_mean) / n_samples); - variance = (((n_samples - 1) * variance) + - ((x - old_mean) * (x - mean))) / n_samples; - } - } -}; - -struct canonical_trace_stats { - uint64_t n_lines_replayed; - - uint64_t n_create; - uint64_t n_create_from_blockpairs; - uint64_t n_alloc_hot; - uint64_t n_alloc_cold; - uint64_t n_free; - uint64_t n_destroy; - - struct streaming_variance_calculator alloc_hot_bytes; - struct streaming_variance_calculator alloc_cold_bytes; - - canonical_trace_stats() { - memset(this, 0, sizeof(*this)); - } -}; - -struct fragmentation_report { - TOKU_DB_FRAGMENTATION_S beginning; - TOKU_DB_FRAGMENTATION_S end; - fragmentation_report() { - memset(this, 0, sizeof(*this)); - } - void merge(const struct fragmentation_report &src_report) { - for (int i = 0; i < 2; i++) { - TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end; - const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end; - dst->file_size_bytes += src->file_size_bytes; - dst->data_bytes += src->data_bytes; - dst->data_blocks += src->data_blocks; - dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional; - dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional; - dst->unused_bytes += src->unused_bytes; - dst->unused_blocks += src->unused_blocks; - dst->largest_unused_block += src->largest_unused_block; - } - } -}; - -static void replay_canonicalized_trace(const vector &canonicalized_trace, - block_allocator::allocation_strategy strategy, - map *reports, - struct canonical_trace_stats *stats) { - // maps an allocator id to its block allocator - map allocator_map; - - // maps allocation seq num to allocated offset - map seq_num_to_offset; - - for (vector::const_iterator it = canonicalized_trace.begin(); - it != canonicalized_trace.end(); it++) { - const int line_num = stats->n_lines_replayed++; - - char *line = toku_strdup(it->c_str()); - line = strip_newline(line, nullptr); - - char *ptr = trim_whitespace(line); - - // canonical allocator id is in base 10, not 16 - string fn = parse_token(&ptr, line_num); - int64_t allocator_id = parse_number(&ptr, line_num, 10); - - if (fn.find("ba_trace_create") != string::npos) { - const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num); - const uint64_t alignment = parse_uint64(&ptr, line_num); - ba_replay_assert(allocator_map.count(allocator_id) == 0, - "corrupted canonical trace: double create", line, line_num); - - block_allocator *ba = new block_allocator(); - if (fn == "ba_trace_create") { - ba->create(reserve_at_beginning, alignment); - stats->n_create++; - } else { - ba_replay_assert(fn == "ba_trace_create_from_blockpairs", - "corrupted canonical trace: bad create fn", line, line_num); - vector pairs; - while (*trim_whitespace(ptr) != '\0') { - const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); - pairs.push_back(bp); - } - ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size()); - stats->n_create_from_blockpairs++; - } - ba->set_strategy(strategy); - - TOKU_DB_FRAGMENTATION_S report; - ba->get_statistics(&report); - (*reports)[allocator_id].beginning = report; - allocator_map[allocator_id] = ba; - } else { - ba_replay_assert(allocator_map.count(allocator_id) > 0, - "corrupted canonical trace: no such allocator", line, line_num); - - block_allocator *ba = allocator_map[allocator_id]; - if (fn == "ba_trace_alloc") { - // replay an `alloc' whose result will be associated with a certain asn - const uint64_t size = parse_uint64(&ptr, line_num); - const uint64_t heat = parse_uint64(&ptr, line_num); - const uint64_t asn = parse_uint64(&ptr, line_num); - ba_replay_assert(seq_num_to_offset.count(asn) == 0, - "corrupted canonical trace: double alloc (asn in use)", line, line_num); - - uint64_t offset; - ba->alloc_block(size, heat, &offset); - seq_num_to_offset[asn] = offset; - heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++; - heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size); - } else if (fn == "ba_trace_free_asn") { - // replay a `free' on a block whose offset is the result of an alloc with an asn - const uint64_t asn = parse_uint64(&ptr, line_num); - ba_replay_assert(seq_num_to_offset.count(asn) == 1, - "corrupted canonical trace: double free (asn unused)", line, line_num); - - const uint64_t offset = seq_num_to_offset[asn]; - ba->free_block(offset); - seq_num_to_offset.erase(asn); - stats->n_free++; - } else if (fn == "ba_trace_free_offset") { - // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs - const uint64_t offset = parse_uint64(&ptr, line_num); - ba->free_block(offset); - stats->n_free++; - } else if (fn == "ba_trace_destroy") { - TOKU_DB_FRAGMENTATION_S report; - ba->get_statistics(&report); - ba->destroy(); - (*reports)[allocator_id].end = report; - allocator_map.erase(allocator_id); - stats->n_destroy++; - } else { - ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num); - } - } - - toku_free(line); - } -} - -static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) { - switch (strategy) { - case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT: - return "first-fit"; - case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT: - return "best-fit"; - case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE: - return "heat-zone"; - case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT: - return "padded-fit"; - default: - abort(); - } -} - -static block_allocator::allocation_strategy cstring_to_strategy(const char *str) { - if (strcmp(str, "first-fit") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT; - } - if (strcmp(str, "best-fit") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT; - } - if (strcmp(str, "heat-zone") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE; - } - if (strcmp(str, "padded-fit") != 0) { - fprintf(stderr, "bad strategy string: %s\n", str); - abort(); - } - return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT; -} - -static void print_result_verbose(uint64_t allocator_id, - block_allocator::allocation_strategy strategy, - const struct fragmentation_report &report) { - if (report.end.data_bytes + report.end.unused_bytes + - report.beginning.data_bytes + report.beginning.unused_bytes - < 32UL * 1024 * 1024) { - printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); - return; - } - - printf(" allocator_id: %20" PRId64 "\n", allocator_id); - printf(" strategy: %20s\n", strategy_to_cstring(strategy)); - - for (int i = 0; i < 2; i++) { - const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end; - printf("%s\n", i == 0 ? "BEFORE" : "AFTER"); - - uint64_t total_bytes = r->data_bytes + r->unused_bytes; - uint64_t total_blocks = r->data_blocks + r->unused_blocks; - - // byte statistics - printf(" total bytes: %20" PRId64 "\n", total_bytes); - printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes, - static_cast(r->data_bytes) / total_bytes); - printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes, - static_cast(r->unused_bytes) / total_bytes); - - // block statistics - printf(" total blocks: %20" PRId64 "\n", total_blocks); - printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks, - static_cast(r->data_blocks) / total_blocks); - printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks, - static_cast(r->unused_blocks) / total_blocks); - - // misc - printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block); - } -} - -static void print_result(uint64_t allocator_id, - block_allocator::allocation_strategy strategy, - const struct fragmentation_report &report) { - const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning; - const TOKU_DB_FRAGMENTATION_S *end = &report.end; - - uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes; - uint64_t total_end_bytes = end->data_bytes + end->unused_bytes; - if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) { - if (verbose) { - printf("\n"); - printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); - } - return; - } - printf("\n"); - if (verbose) { - print_result_verbose(allocator_id, strategy, report); - } else { - printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n", - strategy_to_cstring(strategy), allocator_id, - static_cast(report.end.data_bytes) / total_end_bytes, - static_cast(report.beginning.data_bytes) / total_beginning_bytes); - } -} - -static int only_aggregate_reports; - -static struct option getopt_options[] = { - { "verbose", no_argument, &verbose, 1 }, - { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 }, - { "include-strategy", required_argument, nullptr, 'i' }, - { "exclude-strategy", required_argument, nullptr, 'x' }, - { nullptr, 0, nullptr, 0 }, -}; - -int main(int argc, char *argv[]) { - int opt; - set candidate_strategies, excluded_strategies; - while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) { - switch (opt) { - case 0: - break; - case 'i': - candidate_strategies.insert(cstring_to_strategy(optarg)); - break; - case 'x': - excluded_strategies.insert(cstring_to_strategy(optarg)); - break; - case '?': - default: - abort(); - }; - } - // Default to everything if nothing was explicitly included. - if (candidate_strategies.empty()) { - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE); - } - // ..but remove anything that was explicitly excluded - for (set::const_iterator it = excluded_strategies.begin(); - it != excluded_strategies.end(); it++) { - candidate_strategies.erase(*it); - } - - // Run the real trace - // - // First, read the raw trace from stdin - vector canonicalized_trace = canonicalize_trace_from(stdin); - - if (!only_aggregate_reports) { - printf("\n"); - printf("Individual reports, by allocator:\n"); - } - - struct canonical_trace_stats stats; - map reports_by_strategy; - for (set::const_iterator it = candidate_strategies.begin(); - it != candidate_strategies.end(); it++) { - const block_allocator::allocation_strategy strategy(*it); - - // replay the canonicalized trace against the current strategy. - // - // we provided the allocator map so we can gather statistics later - struct canonical_trace_stats dummy_stats; - map reports; - replay_canonicalized_trace(canonicalized_trace, strategy, &reports, - // Only need to gather canonical trace stats once - it == candidate_strategies.begin() ? &stats : &dummy_stats); - - struct fragmentation_report aggregate_report; - memset(&aggregate_report, 0, sizeof(aggregate_report)); - for (map::iterator rp = reports.begin(); - rp != reports.end(); rp++) { - const struct fragmentation_report &report = rp->second; - aggregate_report.merge(report); - if (!only_aggregate_reports) { - print_result(rp->first, strategy, report); - } - } - reports_by_strategy[strategy] = aggregate_report; - } - - printf("\n"); - printf("Aggregate reports, by strategy:\n"); - - for (map::iterator it = reports_by_strategy.begin(); - it != reports_by_strategy.end(); it++) { - print_result(0, it->first, it->second); - } - - printf("\n"); - printf("Overall trace stats:\n"); - printf("\n"); - printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed); - printf(" n_create: %15" PRIu64 "\n", stats.n_create); - printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs); - printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot); - printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold); - printf(" n_free: %15" PRIu64 "\n", stats.n_free); - printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy); - printf("\n"); - printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean); - printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance)); - printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean); - printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance)); - printf("\n"); - - return 0; -} diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc index 5920be8deda..2324249ba00 100644 --- a/storage/tokudb/PerconaFT/tools/ftverify.cc +++ b/storage/tokudb/PerconaFT/tools/ftverify.cc @@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) } } { - toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf( fd, header_1_off, diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc index 23ef72218ac..f6d777b4161 100644 --- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc +++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc @@ -192,6 +192,7 @@ static void dump_header(FT ft) { dump_descriptor(&ft->descriptor); printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows); printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes); + printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows); } static int64_t getRootNode(FT ft) { diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc index 48ff28e89af..76b1d9c713e 100644 --- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc @@ -110,7 +110,7 @@ test2 (void) { static void test3 (void) -// Compare the simple version to the highly optimized verison. +// Compare the simple version to the highly optimized version. { const int datalen = 1000; char data[datalen]; diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 25f77301696..a77f46de9d0 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -369,17 +369,17 @@ void TOKUDB_SHARE::update_row_count( pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100); if (_row_delta_activity >= pct_of_rows_changed_to_trigger) { char msg[200]; - snprintf( - msg, - sizeof(msg), - "TokuDB: Auto %s background analysis for %s, delta_activity " - "%llu is greater than %llu percent of %llu rows.", - tokudb::sysvars::analyze_in_background(thd) > 0 ? - "scheduling" : "running", - full_table_name(), - _row_delta_activity, - auto_threshold, - (ulonglong)(_rows)); + snprintf(msg, + sizeof(msg), + "TokuDB: Auto %s analysis for %s, delta_activity %llu is " + "greater than %llu percent of %llu rows.", + tokudb::sysvars::analyze_in_background(thd) > 0 + ? "scheduling background" + : "running foreground", + full_table_name(), + _row_delta_activity, + auto_threshold, + (ulonglong)(_rows)); // analyze_standard will unlock _mutex regardless of success/failure int ret = analyze_standard(thd, NULL); @@ -4096,7 +4096,7 @@ int ha_tokudb::write_row(uchar * record) { goto cleanup; } if (curr_num_DBs == 1) { - error = insert_row_to_main_dictionary(record,&prim_key, &row, txn); + error = insert_row_to_main_dictionary(record, &prim_key, &row, txn); if (error) { goto cleanup; } } else { error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd); @@ -6130,7 +6130,7 @@ int ha_tokudb::info(uint flag) { // we should always have a primary key assert_always(share->file != NULL); - error = estimate_num_rows(share->file,&num_rows, txn); + error = estimate_num_rows(share->file, &num_rows, txn); if (error == 0) { share->set_row_count(num_rows, false); stats.records = num_rows; diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc index db3d6c112d4..6d8e7173c8d 100644 --- a/storage/tokudb/ha_tokudb_admin.cc +++ b/storage/tokudb/ha_tokudb_admin.cc @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -43,13 +43,11 @@ public: virtual ~recount_rows_t(); virtual const char* key(); - - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status); + virtual const char* database(); + virtual const char* table(); + virtual const char* type(); + virtual const char* parameters(); + virtual const char* status(); protected: virtual void on_run(); @@ -64,6 +62,8 @@ private: ulonglong _throttle; // for recount rows status reporting + char _parameters[256]; + char _status[1024]; int _result; ulonglong _recount_start; // in microseconds ulonglong _total_elapsed_time; // in microseconds @@ -78,7 +78,6 @@ private: uint64_t deleted, void* extra); int analyze_recount_rows_progress(uint64_t count, uint64_t deleted); - void get_analyze_status(char*); }; void* recount_rows_t::operator new(size_t sz) { @@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t( } _throttle = tokudb::sysvars::analyze_throttle(thd); + + snprintf(_parameters, + sizeof(_parameters), + "TOKUDB_ANALYZE_THROTTLE=%llu;", + _throttle); + _status[0] = '\0'; } recount_rows_t::~recount_rows_t() { } void recount_rows_t::on_run() { + const char* orig_proc_info = NULL; + if (_thd) + orig_proc_info = tokudb_thd_get_proc_info(_thd); _recount_start = tokudb::time::microsec(); _total_elapsed_time = 0; @@ -171,6 +179,8 @@ void recount_rows_t::on_run() { _result, _share->row_count()); error: + if(_thd) + tokudb_thd_set_proc_info(_thd, orig_proc_info); return; } void recount_rows_t::on_destroy() { @@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() { const char* recount_rows_t::key() { return _share->full_table_name(); } -void recount_rows_t::status( - char* database, - char* table, - char* type, - char* params, - char* status) { - - strcpy(database, _share->database_name()); - strcpy(table, _share->table_name()); - strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS"); - sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle); - get_analyze_status(status); +const char* recount_rows_t::database() { + return _share->database_name(); +} +const char* recount_rows_t::table() { + return _share->table_name(); +} +const char* recount_rows_t::type() { + static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS"; + return type; +} +const char* recount_rows_t::parameters() { + return _parameters; +} +const char* recount_rows_t::status() { + return _status; } int recount_rows_t::analyze_recount_rows_progress( uint64_t count, @@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress( return ER_ABORTING_CONNECTION; } + // rebuild status + // There is a slight race condition here, + // _status is used here for tokudb_thd_set_proc_info and it is also used + // for the status column in i_s.background_job_status. + // If someone happens to be querying/building the i_s table + // at the exact same time that the status is being rebuilt here, + // the i_s table could get some garbage status. + // This solution is a little heavy handed but it works, it prevents us + // from changing the status while someone might be immediately observing + // us and it prevents someone from observing us while we change the + // status + tokudb::background::_job_manager->lock(); + snprintf(_status, + sizeof(_status), + "recount_rows %s.%s counted %llu rows and %llu deleted " + "in %llu seconds.", + _share->database_name(), + _share->table_name(), + _rows, + _deleted_rows, + _total_elapsed_time / tokudb::time::MICROSECONDS); + tokudb::background::_job_manager->unlock(); + // report - if (_thd) { - char status[256]; - get_analyze_status(status); - thd_proc_info(_thd, status); - } + if (_thd) + tokudb_thd_set_proc_info(_thd, _status); // throttle // given the throttle value, lets calculate the maximum number of rows @@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress( } return 0; } -void recount_rows_t::get_analyze_status(char* msg) { - sprintf( - msg, - "recount_rows %s.%s counted %llu rows and %llu deleted in %llu " - "seconds.", - _share->database_name(), - _share->table_name(), - _rows, - _deleted_rows, - _total_elapsed_time / tokudb::time::MICROSECONDS); -} - class standard_t : public tokudb::background::job_manager_t::job_t { public: @@ -261,13 +282,11 @@ public: virtual ~standard_t(); virtual const char* key(void); - - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status); + virtual const char* database(); + virtual const char* table(); + virtual const char* type(); + virtual const char* parameters(); + virtual const char* status(); protected: virtual void on_run(); @@ -284,6 +303,8 @@ private: double _delete_fraction; // for analyze status reporting, may also use other state + char _parameters[256]; + char _status[1024]; int _result; ulonglong _analyze_start; // in microseconds ulonglong _total_elapsed_time; // in microseconds @@ -305,7 +326,6 @@ private: uint64_t deleted_rows); bool analyze_standard_cursor_callback(uint64_t deleted_rows); - void get_analyze_status(char*); int analyze_key_progress(); int analyze_key(uint64_t* rec_per_key_part); }; @@ -351,6 +371,16 @@ standard_t::standard_t( _time_limit = tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS; _delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd); + + snprintf(_parameters, + sizeof(_parameters), + "TOKUDB_ANALYZE_DELETE_FRACTION=%f; " + "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;", + _delete_fraction, + _time_limit / tokudb::time::MICROSECONDS, + _throttle); + + _status[0] = '\0'; } standard_t::~standard_t() { } @@ -358,6 +388,10 @@ void standard_t::on_run() { DB_BTREE_STAT64 stat64; uint64_t rec_per_key_part[_share->_max_key_parts]; uint64_t total_key_parts = 0; + const char* orig_proc_info = NULL; + if (_thd) + orig_proc_info = tokudb_thd_get_proc_info(_thd); + _analyze_start = tokudb::time::microsec(); _half_time = _time_limit > 0 ? _time_limit/2 : 0; @@ -395,7 +429,7 @@ void standard_t::on_run() { _result = HA_ADMIN_FAILED; } if (_thd && (_result == HA_ADMIN_FAILED || - (double)_deleted_rows > + static_cast(_deleted_rows) > _delete_fraction * (_rows + _deleted_rows))) { char name[256]; int namelen; @@ -460,8 +494,9 @@ cleanup: } error: + if (_thd) + tokudb_thd_set_proc_info(_thd, orig_proc_info); return; - } void standard_t::on_destroy() { _share->lock(); @@ -472,24 +507,21 @@ void standard_t::on_destroy() { const char* standard_t::key() { return _share->full_table_name(); } -void standard_t::status( - char* database, - char* table, - char* type, - char* params, - char* status) { - - strcpy(database, _share->database_name()); - strcpy(table, _share->table_name()); - strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD"); - sprintf( - params, - "TOKUDB_ANALYZE_DELETE_FRACTION=%f; " - "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;", - _delete_fraction, - _time_limit / tokudb::time::MICROSECONDS, - _throttle); - get_analyze_status(status); +const char* standard_t::database() { + return _share->database_name(); +} +const char* standard_t::table() { + return _share->table_name(); +} +const char* standard_t::type() { + static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD"; + return type; +} +const char* standard_t::parameters() { + return _parameters; +} +const char* standard_t::status() { + return _status; } bool standard_t::analyze_standard_cursor_callback( void* extra, @@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) { _ticks += deleted_rows; return analyze_key_progress() != 0; } -void standard_t::get_analyze_status(char* msg) { - static const char* scan_direction_str[] = { - "not scanning", - "scanning forward", - "scanning backward", - "scan unknown" - }; - - const char* scan_direction = NULL; - switch (_scan_direction) { - case 0: scan_direction = scan_direction_str[0]; break; - case DB_NEXT: scan_direction = scan_direction_str[1]; break; - case DB_PREV: scan_direction = scan_direction_str[2]; break; - default: scan_direction = scan_direction_str[3]; break; - } - - float progress_rows = 0.0; - if (_share->row_count() > 0) - progress_rows = (float) _rows / (float) _share->row_count(); - float progress_time = 0.0; - if (_time_limit > 0) - progress_time = (float) _key_elapsed_time / (float) _time_limit; - sprintf( - msg, - "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, " - "%s", - _share->database_name(), - _share->table_name(), - _share->_key_descriptors[_current_key]._name, - _current_key, - _share->_keys, - progress_rows * 100.0, - progress_time * 100.0, - scan_direction); -} int standard_t::analyze_key_progress(void) { if (_ticks > 1000) { _ticks = 0; @@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) { if ((_thd && thd_killed(_thd)) || cancelled()) { // client killed return ER_ABORTING_CONNECTION; - } else if(_time_limit > 0 && - (uint64_t)_key_elapsed_time > _time_limit) { + } else if (_time_limit > 0 && + static_cast(_key_elapsed_time) > _time_limit) { // time limit reached return ETIME; } - // report - if (_thd) { - char status[256]; - get_analyze_status(status); - thd_proc_info(_thd, status); + // rebuild status + // There is a slight race condition here, + // _status is used here for tokudb_thd_set_proc_info and it is also used + // for the status column in i_s.background_job_status. + // If someone happens to be querying/building the i_s table + // at the exact same time that the status is being rebuilt here, + // the i_s table could get some garbage status. + // This solution is a little heavy handed but it works, it prevents us + // from changing the status while someone might be immediately observing + // us and it prevents someone from observing us while we change the + // status. + static const char* scan_direction_str[] = {"not scanning", + "scanning forward", + "scanning backward", + "scan unknown"}; + + const char* scan_direction = NULL; + switch (_scan_direction) { + case 0: + scan_direction = scan_direction_str[0]; + break; + case DB_NEXT: + scan_direction = scan_direction_str[1]; + break; + case DB_PREV: + scan_direction = scan_direction_str[2]; + break; + default: + scan_direction = scan_direction_str[3]; + break; } + float progress_rows = 0.0; + if (_share->row_count() > 0) + progress_rows = static_cast(_rows) / + static_cast(_share->row_count()); + float progress_time = 0.0; + if (_time_limit > 0) + progress_time = static_cast(_key_elapsed_time) / + static_cast(_time_limit); + tokudb::background::_job_manager->lock(); + snprintf( + _status, + sizeof(_status), + "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% " + "time, %s", + _share->database_name(), + _share->table_name(), + _share->_key_descriptors[_current_key]._name, + _current_key, + _share->_keys, + progress_rows * 100.0, + progress_time * 100.0, + scan_direction); + tokudb::background::_job_manager->unlock(); + + // report + if (_thd) + tokudb_thd_set_proc_info(_thd, _status); + // throttle // given the throttle value, lets calculate the maximum number of rows // we should have seen so far in a .1 sec resolution @@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) { assert_always(close_error == 0); done: + // in case we timed out (bunch of deleted records) without hitting a + // single row + if (_rows == 0) + _rows = 1; + // return cardinality for (uint64_t i = 0; i < num_key_parts; i++) { rec_per_key_part[i] = _rows / unique_rows[i]; @@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) { assert_always(thd != NULL); - const char *orig_proc_info = tokudb_thd_get_proc_info(thd); int result = HA_ADMIN_OK; tokudb::analyze::recount_rows_t* job @@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) { result = HA_ADMIN_FAILED; } - thd_proc_info(thd, orig_proc_info); - TOKUDB_HANDLER_DBUG_RETURN(result); } @@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) { TOKUDB_HANDLER_DBUG_RETURN(result); } - const char *orig_proc_info = tokudb_thd_get_proc_info(thd); - tokudb::analyze::standard_t* job = new tokudb::analyze::standard_t(txn == NULL ? false : true, thd, this, txn); @@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) { lock(); - thd_proc_info(thd, orig_proc_info); - TOKUDB_HANDLER_DBUG_RETURN(result); } diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h index 0269c47ffa3..83f8a8a21d2 100644 --- a/storage/tokudb/hatoku_defines.h +++ b/storage/tokudb/hatoku_defines.h @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -232,9 +232,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // mysql 5.6.15 removed the test macro, so we define our own #define tokudb_test(e) ((e) ? 1 : 0) -inline const char* tokudb_thd_get_proc_info(const THD *thd) { +inline const char* tokudb_thd_get_proc_info(const THD* thd) { return thd->proc_info; } +inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) { + thd_proc_info(thd, proc_info); +} // uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead inline uint tokudb_uint3korr(const uchar *a) { diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result index 5282f0ec9ae..8b24c3c9f72 100644 --- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result +++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result @@ -25,7 +25,7 @@ TokuDB_background_job_status CREATE TEMPORARY TABLE `TokuDB_background_job_statu `scheduler` varchar(32) NOT NULL DEFAULT '', `scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', `started_time` datetime DEFAULT NULL, - `status` varchar(256) DEFAULT NULL + `status` varchar(1024) DEFAULT NULL ) ENGINE=MEMORY DEFAULT CHARSET=utf8 create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c)); insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3); diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test index 6100d9aeec2..8b6df4966f4 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test @@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`; create table foo (a int, b int); create table bar (a int, key(a)); -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test index e1acea13ed7..53c1037b051 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test @@ -15,33 +15,11 @@ create table bar (a int); alter table foo drop column a; alter table bar add column b int, add column c int; -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test index 17a124249da..0421b8e9d26 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test @@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB; alter table foo drop index b; alter table bar add index (a); -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test index 42dbb30058a..4c40339be5a 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test @@ -7,17 +7,7 @@ set default_storage_engine='tokudb'; # capture the datadir let $MYSQLD_DATADIR= `SELECT @@datadir`; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # remove all tokudb file in the datadir system mkdir $MYSQLD_DATADIR/save; system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save; @@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test; # install 6.6.8 tokudb test files system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc +--source include/start_mysqld.inc create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2; @@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map; # check that the test dir is empty list_files $MYSQLD_DATADIR/test *.frm; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # restore saved datadir system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test; system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR; system rmdir $MYSQLD_DATADIR/save; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc - +--source include/start_mysqld.inc diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test index 3903c2cef9f..0340b960fa5 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test @@ -6,17 +6,7 @@ set default_storage_engine='tokudb'; # capture the datadir let $MYSQLD_DATADIR= `SELECT @@datadir`; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # remove all tokudb file in the datadir system mkdir $MYSQLD_DATADIR/save; system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save; @@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test; # install 6.6.8 tokudb test files system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc +--source include/start_mysqld.inc create table tc (id int, x int, primary key(id), key(x)); @@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map; # check that the test dir is empty list_files $MYSQLD_DATADIR/test *.frm; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # restore saved datadir system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test; system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR; system rmdir $MYSQLD_DATADIR/save; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc - +--source include/start_mysqld.inc diff --git a/storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result deleted file mode 100644 index cdab171d413..00000000000 --- a/storage/tokudb/mysql-test/tokudb_rpl/r/rpl_foreign_key_tokudb.result +++ /dev/null @@ -1,58 +0,0 @@ -include/master-slave.inc -Warnings: -Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. -Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. -[connection master] -CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB; -CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB; -SET FOREIGN_KEY_CHECKS=0; -INSERT INTO t1 VALUES (10); -INSERT INTO t1 VALUES (NULL),(NULL),(NULL); -INSERT INTO t2 VALUES (5,0); -INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID()); -SET FOREIGN_KEY_CHECKS=1; -SELECT * FROM t1 ORDER BY a; -a -10 -11 -12 -13 -SELECT * FROM t2 ORDER BY b; -b c -5 0 -6 11 -include/sync_slave_sql_with_master.inc -SELECT * FROM t1 ORDER BY a; -a -10 -11 -12 -13 -SELECT * FROM t2 ORDER BY b; -b c -5 0 -6 11 -SET TIMESTAMP=1000000000; -CREATE TABLE t3 ( a INT UNIQUE ); -SET FOREIGN_KEY_CHECKS=0; -INSERT INTO t3 VALUES (1),(1); -Got one of the listed errors -include/sync_slave_sql_with_master.inc -SET FOREIGN_KEY_CHECKS=0; -DROP TABLE IF EXISTS t1,t2,t3; -SET FOREIGN_KEY_CHECKS=1; -include/sync_slave_sql_with_master.inc -create table t1 (b int primary key) engine = TokuDB; -create table t2 (a int primary key, b int, foreign key (b) references t1(b)) -engine = TokuDB; -insert into t1 set b=1; -insert into t2 set a=1, b=1; -set foreign_key_checks=0; -delete from t1; -must sync w/o a problem (could not with the buggy code) -include/sync_slave_sql_with_master.inc -select count(*) from t1 /* must be zero */; -count(*) -0 -drop table t2,t1; -include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test deleted file mode 100644 index d798cfd4a62..00000000000 --- a/storage/tokudb/mysql-test/tokudb_rpl/t/rpl_foreign_key_tokudb.test +++ /dev/null @@ -1,4 +0,0 @@ --- source include/not_ndb_default.inc --- source include/have_tokudb.inc -let $engine_type=TokuDB; --- source extra/rpl_tests/rpl_foreign_key.test diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc index d8ef54a5972..e019e41c788 100644 --- a/storage/tokudb/tokudb_background.cc +++ b/storage/tokudb/tokudb_background.cc @@ -8,7 +8,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -68,7 +68,8 @@ void job_manager_t::destroy() { while (_background_jobs.size()) { _mutex.lock(); job_t* job = _background_jobs.front(); - cancel(job); + if (!job->cancelled()) + cancel(job); _background_jobs.pop_front(); delete job; _mutex.unlock(); @@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) { it != _background_jobs.end(); it++) { job_t* job = *it; - if (!job->cancelled() && - strcmp(job->key(), key) == 0) { - + if (!job->cancelled() && strcmp(job->key(), key) == 0) { cancel(job); - ret = true; } } @@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) { } void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { - char database[256], table[256], type[256], params[256], status[256]; - _mutex.lock(); for (jobs_t::const_iterator it = _background_jobs.begin(); @@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { it++) { job_t* job = *it; if (!job->cancelled()) { - database[0] = table[0] = type[0] = params[0] = status[0] = '\0'; - job->status(database, table, type, params, status); - callback( - job->id(), - database, - table, - type, - params, - status, - job->user_scheduled(), - job->scheduled_time(), - job->started_time(), - extra); + callback(job, extra); } } @@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) { } void job_manager_t::cancel(job_t* job) { assert_debug(_mutex.is_owned_by_me()); + assert_always(!job->cancelled()); job->cancel(); } job_manager_t* _job_manager = NULL; diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h index 3786701fd0f..29991ab325d 100644 --- a/storage/tokudb/tokudb_background.h +++ b/storage/tokudb/tokudb_background.h @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -58,13 +58,20 @@ public: // (or jobs) usually used to find jobs to cancel virtual const char* key() = 0; - // method to get info for information schema, 255 chars per buffer - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status) = 0; + // method to obtain the database name the job is scheduled on + virtual const char* database() = 0; + + // method to obtain the table name the job is scheduled on + virtual const char* table() = 0; + + // method to obtain the type of job + virtual const char* type() = 0; + + // method to obtain a stringized list of job parameters + virtual const char* parameters() = 0; + + // method to obtain a sting identifying the current status of the job + virtual const char* status() = 0; inline bool running() const; @@ -99,17 +106,7 @@ public: }; // pfn for iterate callback - typedef void (*pfn_iterate_t)( - uint64_t, - const char*, - const char*, - const char*, - const char*, - const char*, - bool, - time_t, - time_t, - void*); + typedef void (*pfn_iterate_t)(class job_t*, void*); public: void* operator new(size_t sz); @@ -144,6 +141,11 @@ public: // data passed when the job was scheduled void iterate_jobs(pfn_iterate_t callback, void* extra) const; + // lock the bjm, this prevents anyone from running, cancelling or iterating + // jobs in the bjm. + inline void lock(); + inline void unlock(); + private: static void* thread_func(void* v); @@ -170,6 +172,15 @@ extern job_manager_t* _job_manager; bool initialize(); bool destroy(); +inline void job_manager_t::lock() { + assert_debug(!_mutex.is_owned_by_me()); + _mutex.lock(); +} +inline void job_manager_t::unlock() { + assert_debug(_mutex.is_owned_by_me()); + _mutex.unlock(); +} + inline void job_manager_t::job_t::run() { if (!_cancelled) { _running = true; diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc index 1d4ca2e0181..6cdd9b275fb 100644 --- a/storage/tokudb/tokudb_information_schema.cc +++ b/storage/tokudb/tokudb_information_schema.cc @@ -1083,7 +1083,7 @@ ST_FIELD_INFO background_job_status_field_info[] = { {"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE }, {"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, - {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE }, + {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1093,15 +1093,7 @@ struct background_job_status_extra { }; void background_job_status_callback( - uint64_t id, - const char* database_name, - const char* table_name, - const char* type, - const char* params, - const char* status, - bool user_scheduled, - time_t scheduled_time, - time_t started_time, + tokudb::background::job_manager_t::job_t* job, void* extra) { background_job_status_extra* e = @@ -1109,24 +1101,33 @@ void background_job_status_callback( THD* thd = e->thd; TABLE* table = e->table; + const char* tmp = NULL; - table->field[0]->store(id, false); - table->field[1]->store( - database_name, - strlen(database_name), - system_charset_info); - table->field[2]->store(table_name, strlen(table_name), system_charset_info); - table->field[3]->store(type, strlen(type), system_charset_info); - table->field[4]->store(params, strlen(params), system_charset_info); - if (user_scheduled) + table->field[0]->store(job->id(), false); + + tmp = job->database(); + table->field[1]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->table(); + table->field[2]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->type(); + table->field[3]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->parameters(); + table->field[4]->store(tmp, strlen(tmp), system_charset_info); + + if (job->user_scheduled()) table->field[5]->store("USER", strlen("USER"), system_charset_info); else table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info); - field_store_time_t(table->field[6], scheduled_time); - field_store_time_t(table->field[7], started_time); - if (status[0] != '\0') { - table->field[8]->store(status, strlen(status), system_charset_info); + field_store_time_t(table->field[6], job->scheduled_time()); + field_store_time_t(table->field[7], job->started_time()); + + tmp = job->status(); + if (tmp && tmp[0] != '\0') { + table->field[8]->store(tmp, strlen(tmp), system_charset_info); table->field[8]->set_notnull(); } else { table->field[8]->store(NULL, 0, system_charset_info); From 93ab3093cb6646834844139ef51c51e2c84b73a6 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 27 Sep 2016 18:00:59 +0200 Subject: [PATCH 53/96] 5.6.32-78.1 --- storage/xtradb/btr/btr0btr.cc | 16 ++-- storage/xtradb/btr/btr0cur.cc | 4 +- storage/xtradb/buf/buf0flu.cc | 2 + storage/xtradb/fil/fil0fil.cc | 6 ++ storage/xtradb/fts/fts0fts.cc | 67 ++++++++++++--- storage/xtradb/fts/fts0opt.cc | 2 +- storage/xtradb/handler/ha_innodb.cc | 121 ++++++++++++++++++++++------ storage/xtradb/handler/i_s.cc | 39 ++++++++- storage/xtradb/ibuf/ibuf0ibuf.cc | 4 +- storage/xtradb/include/buf0buf.h | 12 +++ storage/xtradb/include/buf0buf.ic | 14 ++++ storage/xtradb/include/fts0fts.h | 4 +- storage/xtradb/include/srv0srv.h | 10 ++- storage/xtradb/include/univ.i | 2 +- storage/xtradb/log/log0log.cc | 4 +- storage/xtradb/log/log0online.cc | 12 ++- storage/xtradb/log/log0recv.cc | 17 ++-- storage/xtradb/row/row0merge.cc | 2 +- storage/xtradb/srv/srv0mon.cc | 7 +- storage/xtradb/srv/srv0srv.cc | 15 ++-- 20 files changed, 280 insertions(+), 80 deletions(-) diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc index 95b8892415c..ecea98fccfe 100644 --- a/storage/xtradb/btr/btr0btr.cc +++ b/storage/xtradb/btr/btr0btr.cc @@ -78,7 +78,7 @@ btr_corruption_report( buf_block_get_zip_size(block), BUF_PAGE_PRINT_NO_CRASH); } - buf_page_print(buf_block_get_frame(block), 0, 0); + buf_page_print(buf_nonnull_block_get_frame(block), 0, 0); } #ifndef UNIV_HOTBACKUP @@ -804,8 +804,10 @@ btr_height_get( /* S latches the page */ root_block = btr_root_block_get(index, RW_S_LATCH, mtr); + ut_ad(root_block); // The index must not be corrupted - height = btr_page_get_level(buf_block_get_frame(root_block), mtr); + height = btr_page_get_level(buf_nonnull_block_get_frame(root_block), + mtr); /* Release the S latch on the root page. */ mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX); @@ -1231,7 +1233,7 @@ btr_get_size( SRV_CORRUPT_TABLE_CHECK(root, { mtr_commit(mtr); - return(0); + return(ULINT_UNDEFINED); }); if (flag == BTR_N_LEAF_PAGES) { @@ -2756,7 +2758,7 @@ btr_attach_half_pages( } /* Get the level of the split pages */ - level = btr_page_get_level(buf_block_get_frame(block), mtr); + level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr); ut_ad(level == btr_page_get_level(buf_block_get_frame(new_block), mtr)); @@ -4133,8 +4135,10 @@ btr_discard_page( /* Decide the page which will inherit the locks */ - left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr); - right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr); + left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block), + mtr); + right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block), + mtr); if (left_page_no != FIL_NULL) { merge_block = btr_block_get(space, zip_size, left_page_no, diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index c1efe9ca91c..753309bdeec 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -1751,7 +1751,7 @@ btr_cur_pessimistic_insert( } if (!page_rec_is_infimum(btr_cur_get_rec(cursor)) || btr_page_get_prev( - buf_block_get_frame( + buf_nonnull_block_get_frame( btr_cur_get_block(cursor)), mtr) == FIL_NULL) { /* split and inserted need to call @@ -2220,7 +2220,7 @@ func_exit: if (page_zip && !(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) - && page_is_leaf(buf_block_get_frame(block))) { + && page_is_leaf(buf_nonnull_block_get_frame(block))) { /* Update the free bits in the insert buffer. */ ibuf_update_free_bits_zip(block, mtr); } diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 03504b15599..14a5fbde7e8 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -309,6 +309,8 @@ buf_flush_init_flush_rbt(void) buf_flush_list_mutex_enter(buf_pool); + ut_ad(buf_pool->flush_rbt == NULL); + /* Create red black tree for speedy insertions in flush list. */ buf_pool->flush_rbt = rbt_create( sizeof(buf_page_t*), buf_flush_block_cmp); diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 75bb811198a..c1dbb5f91b9 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1822,6 +1822,9 @@ fil_close_all_files(void) { fil_space_t* space; + // Must check both flags as it's possible for this to be called during + // server startup with srv_track_changed_pages == true but + // srv_redo_log_thread_started == false if (srv_track_changed_pages && srv_redo_log_thread_started) os_event_wait(srv_redo_log_tracked_event); @@ -1861,6 +1864,9 @@ fil_close_log_files( { fil_space_t* space; + // Must check both flags as it's possible for this to be called during + // server startup with srv_track_changed_pages == true but + // srv_redo_log_thread_started == false if (srv_track_changed_pages && srv_redo_log_thread_started) os_event_wait(srv_redo_log_tracked_event); diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 22278338072..25059db96b0 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -265,13 +265,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait); + bool wait, + bool has_dict); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -3566,7 +3568,7 @@ fts_add_doc_by_id( DBUG_EXECUTE_IF( "fts_instrument_sync_debug", - fts_sync(cache->sync, true, true); + fts_sync(cache->sync, true, true, false); ); DEBUG_SYNC_C("fts_instrument_sync_request"); @@ -4378,13 +4380,11 @@ fts_sync_index( } /** Check if index cache has been synced completely -@param[in,out] sync sync state @param[in,out] index_cache index cache @return true if index is synced, otherwise false. */ static bool fts_sync_index_check( - fts_sync_t* sync, fts_index_cache_t* index_cache) { const ib_rbt_node_t* rbt_node; @@ -4407,14 +4407,36 @@ fts_sync_index_check( return(true); } -/*********************************************************************//** -Commit the SYNC, change state of processed doc ids etc. +/** Reset synced flag in index cache when rollback +@param[in,out] index_cache index cache */ +static +void +fts_sync_index_reset( + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast(ib_vector_last(word->nodes)); + + fts_node->synced = false; + } +} + +/** Commit the SYNC, change state of processed doc ids etc. +@param[in,out] sync sync state @return DB_SUCCESS if all OK */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t fts_sync_commit( -/*============*/ - fts_sync_t* sync) /*!< in: sync state */ + fts_sync_t* sync) { dberr_t error; trx_t* trx = sync->trx; @@ -4467,6 +4489,8 @@ fts_sync_commit( (double) n_nodes/ (double) elapsed_time); } + /* Avoid assertion in trx_free(). */ + trx->dict_operation_lock_mode = 0; trx_free_for_background(trx); return(error); @@ -4489,6 +4513,10 @@ fts_sync_rollback( index_cache = static_cast( ib_vector_get(cache->indexes, i)); + /* Reset synced flag so nodes will not be skipped + in the next sync, see fts_sync_write_words(). */ + fts_sync_index_reset(index_cache); + for (j = 0; fts_index_selector[j].value; ++j) { if (index_cache->ins_graph[j] != NULL) { @@ -4514,6 +4542,9 @@ fts_sync_rollback( rw_lock_x_unlock(&cache->lock); fts_sql_rollback(trx); + + /* Avoid assertion in trx_free(). */ + trx->dict_operation_lock_mode = 0; trx_free_for_background(trx); } @@ -4522,13 +4553,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait) + bool wait, + bool has_dict) { ulint i; dberr_t error = DB_SUCCESS; @@ -4557,6 +4590,12 @@ fts_sync( DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); + /* When sync in background, we hold dict operation lock + to prevent DDL like DROP INDEX, etc. */ + if (has_dict) { + sync->trx->dict_operation_lock_mode = RW_S_LATCH; + } + begin_sync: if (cache->total_size > fts_max_cache_size) { /* Avoid the case: sync never finish when @@ -4597,7 +4636,7 @@ begin_sync: ib_vector_get(cache->indexes, i)); if (index_cache->index->to_be_dropped - || fts_sync_index_check(sync, index_cache)) { + || fts_sync_index_check(index_cache)) { continue; } @@ -4612,6 +4651,7 @@ end_sync: } rw_lock_x_lock(&cache->lock); + sync->interrupted = false; sync->in_progress = false; os_event_set(sync->event); rw_lock_x_unlock(&cache->lock); @@ -4635,20 +4675,23 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table @param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( dict_table_t* table, bool unlock_cache, - bool wait) + bool wait, + bool has_dict) { dberr_t err = DB_SUCCESS; ut_ad(table->fts); if (!dict_table_is_discarded(table) && table->fts->cache) { - err = fts_sync(table->fts->cache->sync, unlock_cache, wait); + err = fts_sync(table->fts->cache->sync, + unlock_cache, wait, has_dict); } return(err); diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index 1cf45961ae2..0d45a195c95 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -2986,7 +2986,7 @@ fts_optimize_sync_table( if (table) { if (dict_table_has_fts_index(table) && table->fts->cache) { - fts_sync_table(table, true, false); + fts_sync_table(table, true, false, true); } dict_table_close(table, FALSE, FALSE); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index c492bfcdd1f..f00d11bd870 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -701,6 +701,19 @@ innobase_is_fake_change( THD* thd); /*!< in: MySQL thread handle of the user for whom the transaction is being committed */ +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list); /******************************************************************//** Maps a MySQL trx isolation level code to the InnoDB isolation level code @@ -3405,6 +3418,7 @@ innobase_init( innobase_hton->purge_changed_page_bitmaps = innobase_purge_changed_page_bitmaps; innobase_hton->is_fake_change = innobase_is_fake_change; + innobase_hton->get_parent_fk_list = innobase_get_parent_fk_list; innobase_hton->kill_connection = innobase_kill_connection; @@ -7294,6 +7308,7 @@ dberr_t ha_innobase::innobase_lock_autoinc(void) /*====================================*/ { + DBUG_ENTER("ha_innobase::innobase_lock_autoinc"); dberr_t error = DB_SUCCESS; ut_ad(!srv_read_only_mode); @@ -7328,6 +7343,8 @@ ha_innobase::innobase_lock_autoinc(void) /* Fall through to old style locking. */ case AUTOINC_OLD_STYLE_LOCKING: + DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used", + ut_ad(0);); error = row_lock_table_autoinc_for_mysql(prebuilt); if (error == DB_SUCCESS) { @@ -7341,7 +7358,7 @@ ha_innobase::innobase_lock_autoinc(void) ut_error; } - return(error); + DBUG_RETURN(error); } /********************************************************************//** @@ -12287,7 +12304,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (prebuilt->table->fts && prebuilt->table->fts->cache && !dict_table_is_discarded(prebuilt->table)) { - fts_sync_table(prebuilt->table, false, true); + fts_sync_table(prebuilt->table, false, true, false); fts_optimize_table(prebuilt->table); } return(HA_ADMIN_OK); @@ -12454,7 +12471,14 @@ ha_innobase::check( prebuilt->select_lock_type = LOCK_NONE; - if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { + bool check_result + = row_check_index_for_mysql(prebuilt, index, &n_rows); + DBUG_EXECUTE_IF( + "dict_set_index_corrupted", + if (!(index->type & DICT_CLUSTERED)) { + check_result = false; + }); + if (!check_result) { innobase_format_name( index_name, sizeof index_name, index->name, TRUE); @@ -12807,6 +12831,75 @@ get_foreign_key_info( return(pf_key_info); } +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys */ +static +void +fill_foreign_key_list(THD* thd, + const dict_table_t* table, + List* f_key_list) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); ++it) { + + dict_foreign_t* foreign = *it; + + FOREIGN_KEY_INFO* pf_key_info + = get_foreign_key_info(thd, foreign); + if (pf_key_info) { + f_key_list->push_back(pf_key_info); + } + } +} + +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list) +{ + ut_a(strlen(path) <= FN_REFLEN); + char norm_name[FN_REFLEN + 1]; + normalize_table_name(norm_name, path); + + trx_t* parent_trx = check_trx_exists(thd); + parent_trx->op_info = "getting list of referencing foreign keys"; + trx_search_latch_release_if_reserved(parent_trx); + + mutex_enter(&dict_sys->mutex); + + dict_table_t* table + = dict_table_open_on_name(norm_name, TRUE, FALSE, + static_cast( + DICT_ERR_IGNORE_INDEX_ROOT + | DICT_ERR_IGNORE_CORRUPT)); + if (!table) { + mutex_exit(&dict_sys->mutex); + return(HA_ERR_NO_SUCH_TABLE); + } + + fill_foreign_key_list(thd, table, f_key_list); + + dict_table_close(table, TRUE, FALSE); + + mutex_exit(&dict_sys->mutex); + parent_trx->op_info = ""; + return(0); +} + /*******************************************************************//** Gets the list of foreign keys in this table. @return always 0, that is, always succeeds */ @@ -12859,9 +12952,6 @@ ha_innobase::get_parent_foreign_key_list( THD* thd, /*!< in: user thread handle */ List* f_key_list) /*!< out: foreign key list */ { - FOREIGN_KEY_INFO* pf_key_info; - dict_foreign_t* foreign; - ut_a(prebuilt != NULL); update_thd(ha_thd()); @@ -12870,20 +12960,7 @@ ha_innobase::get_parent_foreign_key_list( trx_search_latch_release_if_reserved(prebuilt->trx); mutex_enter(&(dict_sys->mutex)); - - for (dict_foreign_set::iterator it - = prebuilt->table->referenced_set.begin(); - it != prebuilt->table->referenced_set.end(); - ++it) { - - foreign = *it; - - pf_key_info = get_foreign_key_info(thd, foreign); - if (pf_key_info) { - f_key_list->push_back(pf_key_info); - } - } - + fill_foreign_key_list(thd, prebuilt->table, f_key_list); mutex_exit(&(dict_sys->mutex)); prebuilt->trx->op_info = ""; @@ -16597,7 +16674,6 @@ innodb_track_changed_pages_validate( for update function */ struct st_mysql_value* value) /*!< in: incoming bool */ { - static bool enabled_on_startup = false; long long intbuf = 0; if (value->val_int(value, &intbuf)) { @@ -16605,8 +16681,7 @@ innodb_track_changed_pages_validate( return 1; } - if (srv_track_changed_pages || enabled_on_startup) { - enabled_on_startup = true; + if (srv_redo_log_thread_started) { *reinterpret_cast(save) = static_cast(intbuf); return 0; diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 4bc834479fe..dfdad55ec3b 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3016,15 +3016,26 @@ i_s_fts_deleted_generic_fill( DBUG_RETURN(0); } - deleted = fts_doc_ids_create(); + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + + DBUG_RETURN(0); + } else if (!dict_table_has_fts_index(user_table)) { + dict_table_close(user_table, FALSE, FALSE); + + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } + deleted = fts_doc_ids_create(); + trx = trx_allocate_for_background(); trx->op_info = "Select for FTS DELETE TABLE"; @@ -3052,6 +3063,8 @@ i_s_fts_deleted_generic_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -3433,6 +3446,12 @@ i_s_fts_index_cache_fill( DBUG_RETURN(0); } + if (user_table->fts == NULL || user_table->fts->cache == NULL) { + dict_table_close(user_table, FALSE, FALSE); + + DBUG_RETURN(0); + } + cache = user_table->fts->cache; ut_a(cache); @@ -3871,10 +3890,15 @@ i_s_fts_index_table_fill( DBUG_RETURN(0); } + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); + user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -3887,6 +3911,8 @@ i_s_fts_index_table_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -4026,14 +4052,21 @@ i_s_fts_config_fill( fields = table->field; + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); + user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } else if (!dict_table_has_fts_index(user_table)) { dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -4089,6 +4122,8 @@ i_s_fts_config_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index 4334fd8c6dd..d0d47c3b87a 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -938,7 +938,7 @@ ibuf_set_free_bits_low( ulint space; ulint page_no; - if (!page_is_leaf(buf_block_get_frame(block))) { + if (!page_is_leaf(buf_nonnull_block_get_frame(block))) { return; } @@ -1113,7 +1113,7 @@ ibuf_update_free_bits_zip( page_no = buf_block_get_page_no(block); zip_size = buf_block_get_zip_size(block); - ut_a(page_is_leaf(buf_block_get_frame(block))); + ut_a(page_is_leaf(buf_nonnull_block_get_frame(block))); ut_a(zip_size); bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index b265b8661c8..9aadd7b12fd 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1060,8 +1060,20 @@ buf_block_get_frame( /*================*/ const buf_block_t* block) /*!< in: pointer to the control block */ MY_ATTRIBUTE((pure)); + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( + const buf_block_t* block) /*!< in: pointer to the control block */ + MY_ATTRIBUTE((pure)); + #else /* UNIV_DEBUG */ # define buf_block_get_frame(block) (block ? (block)->frame : 0) +# define buf_nonnull_block_get_frame(block) ((block)->frame) #endif /* UNIV_DEBUG */ /*********************************************************************//** Gets the space id of a block. diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index b40285ae3f0..8a21f44a2ee 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -690,6 +690,19 @@ buf_block_get_frame( { SRV_CORRUPT_TABLE_CHECK(block, return(0);); + return(buf_nonnull_block_get_frame(block)); +} + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( +/*========================*/ + const buf_block_t* block) /*!< in: pointer to the control block */ +{ switch (buf_block_get_state(block)) { case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_ZIP_PAGE: @@ -711,6 +724,7 @@ buf_block_get_frame( ok: return((buf_frame_t*) block->frame); } + #endif /* UNIV_DEBUG */ /*********************************************************************//** diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index 68d4d333245..87b5787d416 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -840,13 +840,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table @param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( dict_table_t* table, bool unlock_cache, - bool wait); + bool wait, + bool has_dict); /****************************************************************//** Free the query graph but check whether dict_sys->mutex is already diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 95065d69974..692d339608a 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -165,8 +165,10 @@ extern os_event_t srv_checkpoint_completed_event; log tracking iteration */ extern os_event_t srv_redo_log_tracked_event; -/** srv_redo_log_follow_thread spawn flag */ -extern bool srv_redo_log_thread_started; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ +extern bool srv_redo_log_thread_started; /* If the last data file is auto-extended, we add this many pages to it at a time */ @@ -262,6 +264,10 @@ extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; + +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ extern my_bool srv_track_changed_pages; extern ulonglong srv_max_bitmap_file_size; diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index ffae4af4c56..4d64e3249c0 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 77.0 +#define PERCONA_INNODB_VERSION 78.1 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index e6e5762b1e9..0768bb6bb00 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -3572,7 +3572,7 @@ loop: /* Wake the log tracking thread which will then immediatelly quit because of srv_shutdown_state value */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_reset(srv_redo_log_tracked_event); os_event_set(srv_checkpoint_completed_event); } @@ -3651,7 +3651,7 @@ loop: srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Signal the log following thread to quit */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_reset(srv_redo_log_tracked_event); os_event_set(srv_checkpoint_completed_event); } diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 4e58755e1f2..d80cb2ad447 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -1796,20 +1796,20 @@ log_online_purge_changed_page_bitmaps( lsn = LSN_MAX; } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { /* User requests might happen with both enabled and disabled tracking */ mutex_enter(&log_bmp_sys->mutex); } if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) { - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { mutex_exit(&log_bmp_sys->mutex); } return TRUE; } - if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) { + if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) { /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); @@ -1842,7 +1842,7 @@ log_online_purge_changed_page_bitmaps( } } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { if (lsn > log_bmp_sys->end_lsn) { lsn_t new_file_lsn; if (lsn == LSN_MAX) { @@ -1853,9 +1853,7 @@ log_online_purge_changed_page_bitmaps( new_file_lsn = log_bmp_sys->end_lsn; } if (!log_online_rotate_bitmap_file(new_file_lsn)) { - /* If file create failed, signal the log - tracking thread to quit next time it wakes - up. */ + /* If file create failed, stop log tracking */ srv_track_changed_pages = FALSE; } } diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 23fadfb0bf2..b80f1f8597e 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -379,12 +379,6 @@ recv_sys_init( } #ifndef UNIV_HOTBACKUP - /* Initialize red-black tree for fast insertions into the - flush_list during recovery process. - As this initialization is done while holding the buffer pool - mutex we perform it before acquiring recv_sys->mutex. */ - buf_flush_init_flush_rbt(); - mutex_enter(&(recv_sys->mutex)); recv_sys->heap = mem_heap_create_typed(256, @@ -474,9 +468,6 @@ recv_sys_debug_free(void) recv_sys->last_block_buf_start = NULL; mutex_exit(&(recv_sys->mutex)); - - /* Free up the flush_rbt. */ - buf_flush_free_flush_rbt(); } # endif /* UNIV_LOG_DEBUG */ @@ -3118,6 +3109,11 @@ recv_recovery_from_checkpoint_start_func( byte* log_hdr_buf_base = static_cast (alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); dberr_t err; + + /* Initialize red-black tree for fast insertions into the + flush_list during recovery process. */ + buf_flush_init_flush_rbt(); + ut_when_dtor tmp(recv_sys->dblwr); log_hdr_buf = static_cast @@ -3537,6 +3533,9 @@ recv_recovery_from_checkpoint_finish(void) #ifndef UNIV_LOG_DEBUG recv_sys_debug_free(); #endif + /* Free up the flush_rbt. */ + buf_flush_free_flush_rbt(); + /* Roll back any recovered data dictionary transactions, so that the data dictionary tables will be free of any locks. The data dictionary latch should guarantee that there is at diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 0bba529d167..feb18c82ab6 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -1993,7 +1993,7 @@ wait_again: /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ err = fts_sync_table(const_cast(new_table), - false, true); + false, true, false); if (err == DB_SUCCESS) { fts_update_next_doc_id( diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc index 80c8f7fadbc..1aab9495644 100644 --- a/storage/xtradb/srv/srv0mon.cc +++ b/storage/xtradb/srv/srv0mon.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -1347,7 +1347,10 @@ srv_mon_set_module_control( module */ set_current_module = FALSE; } else if (module_id == MONITOR_ALL_COUNTER) { - continue; + if (!(innodb_counter_info[ix].monitor_type + & MONITOR_GROUP_MODULE)) { + continue; + } } else { /* Hitting the next module, stop */ break; diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index cba87df32b0..1012d0ccb08 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -184,6 +184,9 @@ UNIV_INTERN char** srv_data_file_names = NULL; /* size in database pages */ UNIV_INTERN ulint* srv_data_file_sizes = NULL; +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ UNIV_INTERN my_bool srv_track_changed_pages = FALSE; UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024; @@ -749,6 +752,9 @@ UNIV_INTERN os_event_t srv_checkpoint_completed_event; UNIV_INTERN os_event_t srv_redo_log_tracked_event; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ UNIV_INTERN bool srv_redo_log_thread_started = false; /*********************************************************************//** @@ -2324,13 +2330,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)( os_event_wait(srv_checkpoint_completed_event); os_event_reset(srv_checkpoint_completed_event); -#ifdef UNIV_DEBUG - if (!srv_track_changed_pages) { - continue; - } -#endif - - if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { + if (srv_track_changed_pages + && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { if (!log_online_follow_redo_log()) { /* TODO: sync with I_S log tracking status? */ ib_logf(IB_LOG_LEVEL_ERROR, From 0e76054b7b5f09246f31f8927194e9782f82634b Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 28 Sep 2016 12:52:01 +0000 Subject: [PATCH 54/96] Feedback plugin : add support for Windows 10 / Server 2016. Also add fallback version string for unknown future versions. --- plugin/feedback/utils.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/plugin/feedback/utils.cc b/plugin/feedback/utils.cc index f6fcb3d2082..cff19c93ed2 100644 --- a/plugin/feedback/utils.cc +++ b/plugin/feedback/utils.cc @@ -43,7 +43,11 @@ static const char *get_os_version_name(OSVERSIONINFOEX *ver) { DWORD major = ver->dwMajorVersion; DWORD minor = ver->dwMinorVersion; - + if (major == 10 && minor == 0) + { + return (ver->wProductType == VER_NT_WORKSTATION) ? + "Windows 10" : "Windows Server 2016"; + } if (major == 6 && minor == 3) { return (ver->wProductType == VER_NT_WORKSTATION)? @@ -102,7 +106,12 @@ static int uname(struct utsname *buf) if(version_str && version_str[0]) sprintf(buf->version, "%s %s",version_str, ver.szCSDVersion); else - sprintf(buf->version, "%s", ver.szCSDVersion); + { + /* Fallback for unknown versions, e.g "Windows ." */ + sprintf(buf->version, "Windows %d.%d%s", + ver.dwMajorVersion, ver.dwMinorVersion, + (ver.wProductType == VER_NT_WORKSTATION ? "" : " Server")); + } #ifdef _WIN64 strcpy(buf->machine, "x64"); From a53f3c6d3cfa50b15b1aff26bc9479eb582d8611 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 28 Sep 2016 16:12:58 +0300 Subject: [PATCH 55/96] MDEV-10649: Optimizer sometimes use "index" instead of "range" access for UPDATE (Fixing both InnoDB and XtraDB) Re-opening a TABLE object (after e.g. FLUSH TABLES or open table cache eviction) causes ha_innobase to call dict_stats_update(DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY). Inside this call, the following is done: dict_stats_empty_table(table); dict_stats_copy(table, t); On the other hand, commands like UPDATE make this call to get the "rows in table" statistics in table->stats.records: ha_innobase->info(HA_STATUS_VARIABLE|HA_STATUS_NO_LOCK) note the HA_STATUS_NO_LOCK parameter. It means, no locks are taken by ::info() If the ::info() call happens between dict_stats_empty_table and dict_stats_copy calls, the UPDATE's optimizer will get an estimate of table->stats.records=1, which causes it to pick a full table scan, which in turn will take a lot of row locks and cause other bad consequences. --- storage/innobase/dict/dict0stats.cc | 29 +++++++++++++++++++---------- storage/xtradb/dict/dict0stats.cc | 29 +++++++++++++++++++---------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index b073398f8ec..a4aa43651f8 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -673,7 +673,10 @@ void dict_stats_copy( /*============*/ dict_table_t* dst, /*!< in/out: destination table */ - const dict_table_t* src) /*!< in: source table */ + const dict_table_t* src, /*!< in: source table */ + bool reset_ignored_indexes) /*!< in: if true, set ignored indexes + to have the same statistics as if + the table was empty */ { dst->stats_last_recalc = src->stats_last_recalc; dst->stat_n_rows = src->stat_n_rows; @@ -692,7 +695,16 @@ dict_stats_copy( && (src_idx = dict_table_get_next_index(src_idx)))) { if (dict_stats_should_ignore_index(dst_idx)) { - continue; + if (reset_ignored_indexes) { + /* Reset index statistics for all ignored indexes, + unless they are FT indexes (these have no statistics)*/ + if (dst_idx->type & DICT_FTS) { + continue; + } + dict_stats_empty_index(dst_idx); + } else { + continue; + } } ut_ad(!dict_index_is_univ(dst_idx)); @@ -782,7 +794,7 @@ dict_stats_snapshot_create( t = dict_stats_table_clone_create(table); - dict_stats_copy(t, table); + dict_stats_copy(t, table, false); t->stat_persistent = table->stat_persistent; t->stats_auto_recalc = table->stats_auto_recalc; @@ -3240,13 +3252,10 @@ dict_stats_update( dict_table_stats_lock(table, RW_X_LATCH); - /* Initialize all stats to dummy values before - copying because dict_stats_table_clone_create() does - skip corrupted indexes so our dummy object 't' may - have less indexes than the real object 'table'. */ - dict_stats_empty_table(table); - - dict_stats_copy(table, t); + /* Pass reset_ignored_indexes=true as parameter + to dict_stats_copy. This will cause statictics + for corrupted indexes to be set to empty values */ + dict_stats_copy(table, t, true); dict_stats_assert_initialized(table); diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index b073398f8ec..a4aa43651f8 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -673,7 +673,10 @@ void dict_stats_copy( /*============*/ dict_table_t* dst, /*!< in/out: destination table */ - const dict_table_t* src) /*!< in: source table */ + const dict_table_t* src, /*!< in: source table */ + bool reset_ignored_indexes) /*!< in: if true, set ignored indexes + to have the same statistics as if + the table was empty */ { dst->stats_last_recalc = src->stats_last_recalc; dst->stat_n_rows = src->stat_n_rows; @@ -692,7 +695,16 @@ dict_stats_copy( && (src_idx = dict_table_get_next_index(src_idx)))) { if (dict_stats_should_ignore_index(dst_idx)) { - continue; + if (reset_ignored_indexes) { + /* Reset index statistics for all ignored indexes, + unless they are FT indexes (these have no statistics)*/ + if (dst_idx->type & DICT_FTS) { + continue; + } + dict_stats_empty_index(dst_idx); + } else { + continue; + } } ut_ad(!dict_index_is_univ(dst_idx)); @@ -782,7 +794,7 @@ dict_stats_snapshot_create( t = dict_stats_table_clone_create(table); - dict_stats_copy(t, table); + dict_stats_copy(t, table, false); t->stat_persistent = table->stat_persistent; t->stats_auto_recalc = table->stats_auto_recalc; @@ -3240,13 +3252,10 @@ dict_stats_update( dict_table_stats_lock(table, RW_X_LATCH); - /* Initialize all stats to dummy values before - copying because dict_stats_table_clone_create() does - skip corrupted indexes so our dummy object 't' may - have less indexes than the real object 'table'. */ - dict_stats_empty_table(table); - - dict_stats_copy(table, t); + /* Pass reset_ignored_indexes=true as parameter + to dict_stats_copy. This will cause statictics + for corrupted indexes to be set to empty values */ + dict_stats_copy(table, t, true); dict_stats_assert_initialized(table); From 7cb79a65ba6286ac66d5ebbebea3243ef97f5c41 Mon Sep 17 00:00:00 2001 From: Arun Kuruvila Date: Wed, 28 Sep 2016 15:52:05 +0530 Subject: [PATCH 56/96] Bug#24707666: DEFAULT SETTING FOR SECURE-FILE-PRIV SHOULD BE RESTRICTED IN ALL GA RELEASES Back port of WL#6782 to 5.5 and 5.6. This also includes back port of Bug#20771331, Bug#20741572 and Bug#20770671. Bug#24695274 and Bug#24679907 are also handled along with this. --- cmake/install_layout.cmake | 256 +++++++++++++++++- config.h.cmake | 4 + mysql-test/include/mtr_warnings.sql | 7 +- mysql-test/include/mysqld--help.inc | 3 +- mysql-test/mysql-test-run.pl | 4 +- mysql-test/r/mysqld--help-notwin.result | 1 - mysql-test/r/mysqld--help-win.result | 1 - .../auth_sec/r/secure_file_priv_error.result | 7 + .../auth_sec/r/secure_file_priv_null.result | 21 ++ .../r/secure_file_priv_warnings.result | 17 ++ .../secure_file_priv_warnings_not_win.result | 9 + .../r/secure_file_priv_warnings_win.result | 8 + .../auth_sec/t/secure_file_priv_error.test | 39 +++ .../t/secure_file_priv_null-master.opt | 1 + .../auth_sec/t/secure_file_priv_null.test | 42 +++ .../t/secure_file_priv_warnings-master.opt | 1 + .../auth_sec/t/secure_file_priv_warnings.test | 47 ++++ .../t/secure_file_priv_warnings_not_win.test | 24 ++ .../t/secure_file_priv_warnings_win.test | 35 +++ packaging/rpm-oel/mysql-systemd-start | 6 + packaging/rpm-oel/mysql.init | 10 +- packaging/rpm-oel/mysql.spec.in | 5 + packaging/rpm-sles/mysql.spec.in | 5 + packaging/solaris/postinstall-solaris.sh | 8 +- sql/mysqld.cc | 252 +++++++++++++++-- sql/sql_class.cc | 2 + sql/sql_class.h | 1 + sql/sys_vars.cc | 8 +- support-files/mysql.spec.sh | 7 +- 29 files changed, 790 insertions(+), 41 deletions(-) create mode 100644 mysql-test/suite/auth_sec/r/secure_file_priv_error.result create mode 100644 mysql-test/suite/auth_sec/r/secure_file_priv_null.result create mode 100644 mysql-test/suite/auth_sec/r/secure_file_priv_warnings.result create mode 100644 mysql-test/suite/auth_sec/r/secure_file_priv_warnings_not_win.result create mode 100644 mysql-test/suite/auth_sec/r/secure_file_priv_warnings_win.result create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_error.test create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_null-master.opt create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_null.test create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_warnings-master.opt create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_warnings.test create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_warnings_not_win.test create mode 100644 mysql-test/suite/auth_sec/t/secure_file_priv_warnings_win.test diff --git a/cmake/install_layout.cmake b/cmake/install_layout.cmake index 4adda0b6eac..4fd18b049f2 100644 --- a/cmake/install_layout.cmake +++ b/cmake/install_layout.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ # and relative links. Windows zip uses the same tarball layout but without # the build prefix. # -# RPM +# RPM, SLES # Build as per default RPM layout, with prefix=/usr # Note: The layout for ULN RPMs differs, see the "RPM" section. # @@ -32,10 +32,22 @@ # SVR4 # Solaris package layout suitable for pkg* tools, prefix=/opt/mysql/mysql # +# FREEBSD, GLIBC, OSX, TARGZ +# Build with prefix=/usr/local/mysql, create tarball with install prefix="." +# and relative links. +# +# WIN +# Windows zip : same as tarball layout but without the build prefix +# # To force a directory layout, use -DINSTALL_LAYOUT=. # # The default is STANDALONE. # +# Note : At present, RPM and SLES layouts are similar. This is also true +# for layouts like FREEBSD, GLIBC, OSX, TARGZ. However, they provide +# opportunity to fine-tune deployment for each platform without +# affecting all other types of deployment. +# # There is the possibility to further fine-tune installation directories. # Several variables can be overwritten: # @@ -60,6 +72,7 @@ # - INSTALL_SUPPORTFILESDIR (various extra support files) # # - INSTALL_MYSQLDATADIR (data directory) +# - INSTALL_SECURE_FILE_PRIVDIR (--secure-file-priv directory) # # When changing this page, _please_ do not forget to update public Wiki # http://forge.mysql.com/wiki/CMake#Fine-tuning_installation_paths @@ -69,10 +82,11 @@ IF(NOT INSTALL_LAYOUT) ENDIF() SET(INSTALL_LAYOUT "${DEFAULT_INSTALL_LAYOUT}" -CACHE STRING "Installation directory layout. Options are: STANDALONE (as in zip or tar.gz installer), RPM, DEB, SVR4") +CACHE STRING "Installation directory layout. Options are: TARGZ (as in tar.gz installer), WIN (as in zip installer), STANDALONE, RPM, DEB, SVR4, FREEBSD, GLIBC, OSX, SLES") IF(UNIX) - IF(INSTALL_LAYOUT MATCHES "RPM") + IF(INSTALL_LAYOUT MATCHES "RPM" OR + INSTALL_LAYOUT MATCHES "SLES") SET(default_prefix "/usr") ELSEIF(INSTALL_LAYOUT MATCHES "DEB") SET(default_prefix "/opt/mysql/server-${MYSQL_BASE_VERSION}") @@ -87,7 +101,7 @@ IF(UNIX) SET(CMAKE_INSTALL_PREFIX ${default_prefix} CACHE PATH "install prefix" FORCE) ENDIF() - SET(VALID_INSTALL_LAYOUTS "RPM" "STANDALONE" "DEB" "SVR4") + SET(VALID_INSTALL_LAYOUTS "RPM" "DEB" "SVR4" "FREEBSD" "GLIBC" "OSX" "TARGZ" "SLES" "STANDALONE") LIST(FIND VALID_INSTALL_LAYOUTS "${INSTALL_LAYOUT}" ind) IF(ind EQUAL -1) MESSAGE(FATAL_ERROR "Invalid INSTALL_LAYOUT parameter:${INSTALL_LAYOUT}." @@ -99,6 +113,15 @@ IF(UNIX) MARK_AS_ADVANCED(SYSCONFDIR) ENDIF() +IF(WIN32) + SET(VALID_INSTALL_LAYOUTS "TARGZ" "STANDALONE" "WIN") + LIST(FIND VALID_INSTALL_LAYOUTS "${INSTALL_LAYOUT}" ind) + IF(ind EQUAL -1) + MESSAGE(FATAL_ERROR "Invalid INSTALL_LAYOUT parameter:${INSTALL_LAYOUT}." + " Choose between ${VALID_INSTALL_LAYOUTS}" ) + ENDIF() +ENDIF() + # # plugin_tests's value should not be used by imported plugins, # just use if(INSTALL_PLUGINTESTDIR). @@ -109,6 +132,22 @@ FILE(GLOB plugin_tests ${CMAKE_SOURCE_DIR}/internal/plugin/*/tests ) +# +# DEFAULT_SECURE_FILE_PRIV_DIR/DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR +# +IF(INSTALL_LAYOUT MATCHES "STANDALONE" OR + INSTALL_LAYOUT MATCHES "WIN") + SET(secure_file_priv_path "NULL") +ELSEIF(INSTALL_LAYOUT MATCHES "RPM" OR + INSTALL_LAYOUT MATCHES "SLES" OR + INSTALL_LAYOUT MATCHES "SVR4" OR + INSTALL_LAYOUT MATCHES "DEB") + SET(secure_file_priv_path "/var/lib/mysql-files") +ELSE() + SET(secure_file_priv_path "${default_prefix}/mysql-files") +ENDIF() +SET(secure_file_priv_embedded_path "NULL") + # # STANDALONE layout # @@ -134,6 +173,148 @@ SET(INSTALL_SUPPORTFILESDIR_STANDALONE "support-files") # SET(INSTALL_MYSQLDATADIR_STANDALONE "data") SET(INSTALL_PLUGINTESTDIR_STANDALONE ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_STANDALONE ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_STANDALONE ${secure_file_priv_embedded_path}) + +# +# WIN layout +# +SET(INSTALL_BINDIR_WIN "bin") +SET(INSTALL_SBINDIR_WIN "bin") +SET(INSTALL_SCRIPTDIR_WIN "scripts") +# +SET(INSTALL_LIBDIR_WIN "lib") +SET(INSTALL_PLUGINDIR_WIN "lib/plugin") +# +SET(INSTALL_INCLUDEDIR_WIN "include") +# +SET(INSTALL_DOCDIR_WIN "docs") +SET(INSTALL_DOCREADMEDIR_WIN ".") +SET(INSTALL_MANDIR_WIN "man") +SET(INSTALL_INFODIR_WIN "docs") +# +SET(INSTALL_SHAREDIR_WIN "share") +SET(INSTALL_MYSQLSHAREDIR_WIN "share") +SET(INSTALL_MYSQLTESTDIR_WIN "mysql-test") +SET(INSTALL_SQLBENCHDIR_WIN ".") +SET(INSTALL_SUPPORTFILESDIR_WIN "support-files") +# +SET(INSTALL_MYSQLDATADIR_WIN "data") +SET(INSTALL_PLUGINTESTDIR_WIN ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_WIN ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_WIN ${secure_file_priv_embedded_path}) + +# +# FREEBSD layout +# +SET(INSTALL_BINDIR_FREEBSD "bin") +SET(INSTALL_SBINDIR_FREEBSD "bin") +SET(INSTALL_SCRIPTDIR_FREEBSD "scripts") +# +SET(INSTALL_LIBDIR_FREEBSD "lib") +SET(INSTALL_PLUGINDIR_FREEBSD "lib/plugin") +# +SET(INSTALL_INCLUDEDIR_FREEBSD "include") +# +SET(INSTALL_DOCDIR_FREEBSD "docs") +SET(INSTALL_DOCREADMEDIR_FREEBSD ".") +SET(INSTALL_MANDIR_FREEBSD "man") +SET(INSTALL_INFODIR_FREEBSD "docs") +# +SET(INSTALL_SHAREDIR_FREEBSD "share") +SET(INSTALL_MYSQLSHAREDIR_FREEBSD "share") +SET(INSTALL_MYSQLTESTDIR_FREEBSD "mysql-test") +SET(INSTALL_SQLBENCHDIR_FREEBSD ".") +SET(INSTALL_SUPPORTFILESDIR_FREEBSD "support-files") +# +SET(INSTALL_MYSQLDATADIR_FREEBSD "data") +SET(INSTALL_PLUGINTESTDIR_FREEBSD ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_FREEBSD ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_FREEBSD ${secure_file_priv_embedded_path}) + +# +# GLIBC layout +# +SET(INSTALL_BINDIR_GLIBC "bin") +SET(INSTALL_SBINDIR_GLIBC "bin") +SET(INSTALL_SCRIPTDIR_GLIBC "scripts") +# +SET(INSTALL_LIBDIR_GLIBC "lib") +SET(INSTALL_PLUGINDIR_GLIBC "lib/plugin") +# +SET(INSTALL_INCLUDEDIR_GLIBC "include") +# +SET(INSTALL_DOCDIR_GLIBC "docs") +SET(INSTALL_DOCREADMEDIR_GLIBC ".") +SET(INSTALL_MANDIR_GLIBC "man") +SET(INSTALL_INFODIR_GLIBC "docs") +# +SET(INSTALL_SHAREDIR_GLIBC "share") +SET(INSTALL_MYSQLSHAREDIR_GLIBC "share") +SET(INSTALL_MYSQLTESTDIR_GLIBC "mysql-test") +SET(INSTALL_SQLBENCHDIR_GLIBC ".") +SET(INSTALL_SUPPORTFILESDIR_GLIBC "support-files") +# +SET(INSTALL_MYSQLDATADIR_GLIBC "data") +SET(INSTALL_PLUGINTESTDIR_GLIBC ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_GLIBC ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_GLIBC ${secure_file_priv_embedded_path}) + +# +# OSX layout +# +SET(INSTALL_BINDIR_OSX "bin") +SET(INSTALL_SBINDIR_OSX "bin") +SET(INSTALL_SCRIPTDIR_OSX "scripts") +# +SET(INSTALL_LIBDIR_OSX "lib") +SET(INSTALL_PLUGINDIR_OSX "lib/plugin") +# +SET(INSTALL_INCLUDEDIR_OSX "include") +# +SET(INSTALL_DOCDIR_OSX "docs") +SET(INSTALL_DOCREADMEDIR_OSX ".") +SET(INSTALL_MANDIR_OSX "man") +SET(INSTALL_INFODIR_OSX "docs") +# +SET(INSTALL_SHAREDIR_OSX "share") +SET(INSTALL_MYSQLSHAREDIR_OSX "share") +SET(INSTALL_MYSQLTESTDIR_OSX "mysql-test") +SET(INSTALL_SQLBENCHDIR_OSX ".") +SET(INSTALL_SUPPORTFILESDIR_OSX "support-files") +# +SET(INSTALL_MYSQLDATADIR_OSX "data") +SET(INSTALL_PLUGINTESTDIR_OSX ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_OSX ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_OSX ${secure_file_priv_embedded_path}) + +# +# TARGZ layout +# +SET(INSTALL_BINDIR_TARGZ "bin") +SET(INSTALL_SBINDIR_TARGZ "bin") +SET(INSTALL_SCRIPTDIR_TARGZ "scripts") +# +SET(INSTALL_LIBDIR_TARGZ "lib") +SET(INSTALL_PLUGINDIR_TARGZ "lib/plugin") +# +SET(INSTALL_INCLUDEDIR_TARGZ "include") +# +SET(INSTALL_DOCDIR_TARGZ "docs") +SET(INSTALL_DOCREADMEDIR_TARGZ ".") +SET(INSTALL_MANDIR_TARGZ "man") +SET(INSTALL_INFODIR_TARGZ "docs") +# +SET(INSTALL_SHAREDIR_TARGZ "share") +SET(INSTALL_MYSQLSHAREDIR_TARGZ "share") +SET(INSTALL_MYSQLTESTDIR_TARGZ "mysql-test") +SET(INSTALL_SQLBENCHDIR_TARGZ ".") +SET(INSTALL_SUPPORTFILESDIR_TARGZ "support-files") +# +SET(INSTALL_MYSQLDATADIR_TARGZ "data") +SET(INSTALL_PLUGINTESTDIR_TARGZ ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_TARGZ ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_TARGZ ${secure_file_priv_embedded_path}) # # RPM layout @@ -169,6 +350,41 @@ SET(INSTALL_SUPPORTFILESDIR_RPM "share/mysql") # SET(INSTALL_MYSQLDATADIR_RPM "/var/lib/mysql") SET(INSTALL_PLUGINTESTDIR_RPM ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_RPM ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_RPM ${secure_file_priv_embedded_path}) + +# +# SLES layout +# +SET(INSTALL_BINDIR_SLES "bin") +SET(INSTALL_SBINDIR_SLES "sbin") +SET(INSTALL_SCRIPTDIR_SLES "bin") +# +IF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + SET(INSTALL_LIBDIR_SLES "lib64") + SET(INSTALL_PLUGINDIR_SLES "lib64/mysql/plugin") +ELSE() + SET(INSTALL_LIBDIR_SLES "lib") + SET(INSTALL_PLUGINDIR_SLES "lib/mysql/plugin") +ENDIF() +# +SET(INSTALL_INCLUDEDIR_SLES "include/mysql") +# +#SET(INSTALL_DOCDIR_SLES unset - installed directly by SLES) +#SET(INSTALL_DOCREADMEDIR_SLES unset - installed directly by SLES) +SET(INSTALL_INFODIR_SLES "share/info") +SET(INSTALL_MANDIR_SLES "share/man") +# +SET(INSTALL_SHAREDIR_SLES "share") +SET(INSTALL_MYSQLSHAREDIR_SLES "share/mysql") +SET(INSTALL_MYSQLTESTDIR_SLES "share/mysql-test") +SET(INSTALL_SQLBENCHDIR_SLES "") +SET(INSTALL_SUPPORTFILESDIR_SLES "share/mysql") +# +SET(INSTALL_MYSQLDATADIR_SLES "/var/lib/mysql") +SET(INSTALL_PLUGINTESTDIR_SLES ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_SLES ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_SLES ${secure_file_priv_embedded_path}) # # DEB layout @@ -193,8 +409,10 @@ SET(INSTALL_MYSQLTESTDIR_DEB "mysql-test") SET(INSTALL_SQLBENCHDIR_DEB ".") SET(INSTALL_SUPPORTFILESDIR_DEB "support-files") # -SET(INSTALL_MYSQLDATADIR_DEB "data") +SET(INSTALL_MYSQLDATADIR_DEB "/var/lib/mysql") SET(INSTALL_PLUGINTESTDIR_DEB ${plugin_tests}) +SET(INSTALL_SECURE_FILE_PRIVDIR_DEB ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_DEB ${secure_file_priv_embedded_path}) # # SVR4 layout @@ -221,7 +439,8 @@ SET(INSTALL_SUPPORTFILESDIR_SVR4 "support-files") # SET(INSTALL_MYSQLDATADIR_SVR4 "/var/lib/mysql") SET(INSTALL_PLUGINTESTDIR_SVR4 ${plugin_tests}) - +SET(INSTALL_SECURE_FILE_PRIVDIR_SVR4 ${secure_file_priv_path}) +SET(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR_SVR4 ${secure_file_priv_embedded_path}) # Clear cached variables if install layout was changed IF(OLD_INSTALL_LAYOUT) @@ -235,8 +454,29 @@ SET(OLD_INSTALL_LAYOUT ${INSTALL_LAYOUT} CACHE INTERNAL "") # will be defined as ${INSTALL_BINDIR_STANDALONE} by default if STANDALONE # layout is chosen) FOREACH(var BIN SBIN LIB MYSQLSHARE SHARE PLUGIN INCLUDE SCRIPT DOC MAN - INFO MYSQLTEST SQLBENCH DOCREADME SUPPORTFILES MYSQLDATA PLUGINTEST) + INFO MYSQLTEST SQLBENCH DOCREADME SUPPORTFILES MYSQLDATA PLUGINTEST + SECURE_FILE_PRIV SECURE_FILE_PRIV_EMBEDDED) SET(INSTALL_${var}DIR ${INSTALL_${var}DIR_${INSTALL_LAYOUT}} CACHE STRING "${var} installation directory" ${FORCE}) MARK_AS_ADVANCED(INSTALL_${var}DIR) ENDFOREACH() + +# +# Set DEFAULT_SECURE_FILE_PRIV_DIR +# This is used as default value for --secure-file-priv +# +IF(INSTALL_SECURE_FILE_PRIVDIR) + SET(DEFAULT_SECURE_FILE_PRIV_DIR "\"${INSTALL_SECURE_FILE_PRIVDIR}\"" + CACHE INTERNAL "default --secure-file-priv directory" FORCE) +ELSE() + SET(DEFAULT_SECURE_FILE_PRIV_DIR \"\" + CACHE INTERNAL "default --secure-file-priv directory" FORCE) +ENDIF() + +IF(INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR) + SET(DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR "\"${INSTALL_SECURE_FILE_PRIV_EMBEDDEDDIR}\"" + CACHE INTERNAL "default --secure-file-priv directory (for embedded library)" FORCE) +ELSE() + SET(DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR "NULL" + CACHE INTERNAL "default --secure-file-priv directory (for embedded library)" FORCE) +ENDIF() diff --git a/config.h.cmake b/config.h.cmake index 4548d0a221f..c7ed127379a 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -624,4 +624,8 @@ #cmakedefine SIZEOF_TIME_T @SIZEOF_TIME_T@ #cmakedefine TIME_T_UNSIGNED @TIME_T_UNSIGNED@ +/* For --secure-file-priv */ +#cmakedefine DEFAULT_SECURE_FILE_PRIV_DIR @DEFAULT_SECURE_FILE_PRIV_DIR@ +#cmakedefine DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR @DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR@ + #endif diff --git a/mysql-test/include/mtr_warnings.sql b/mysql-test/include/mtr_warnings.sql index 45acbc03b7e..0a3c3bc60b3 100644 --- a/mysql-test/include/mtr_warnings.sql +++ b/mysql-test/include/mtr_warnings.sql @@ -1,4 +1,4 @@ --- Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. +-- Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by @@ -204,6 +204,11 @@ INSERT INTO global_suppressions VALUES */ ("Found lock of type 6 that is write and read locked"), + /* + Warnings related to --secure-file-priv + */ + ("Insecure configuration for --secure-file-priv:*"), + ("THE_LAST_SUPPRESSION")|| diff --git a/mysql-test/include/mysqld--help.inc b/mysql-test/include/mysqld--help.inc index 380a7f6c8cf..7fa57abbe1e 100644 --- a/mysql-test/include/mysqld--help.inc +++ b/mysql-test/include/mysqld--help.inc @@ -18,7 +18,8 @@ perl; # their paths may vary: @skipvars=qw/basedir open-files-limit general-log-file log plugin-dir log-slow-queries pid-file slow-query-log-file - datadir slave-load-tmpdir tmpdir socket/; + datadir slave-load-tmpdir tmpdir socket + secure-file-priv/; # Plugins which may or may not be there: @plugins=qw/innodb ndb archive blackhole federated partition ndbcluster debug temp-pool ssl des-key-file diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 684d262f410..3eb70c1bdb9 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # -*- cperl -*- -# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -1823,6 +1823,7 @@ sub collect_mysqld_features { mtr_init_args(\$args); mtr_add_arg($args, "--no-defaults"); mtr_add_arg($args, "--datadir=%s", mixed_path($tmpdir)); + mtr_add_arg($args, "--secure-file-priv=\"\""); mtr_add_arg($args, "--lc-messages-dir=%s", $path_language); mtr_add_arg($args, "--skip-grant-tables"); mtr_add_arg($args, "--verbose"); @@ -3297,6 +3298,7 @@ sub mysql_install_db { mtr_add_arg($args, "--loose-skip-falcon"); mtr_add_arg($args, "--loose-skip-ndbcluster"); mtr_add_arg($args, "--tmpdir=%s", "$opt_vardir/tmp/"); + mtr_add_arg($args, "--secure-file-priv=%s", "$opt_vardir"); mtr_add_arg($args, "--core-file"); if ( $opt_debug ) diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index d527d6cb702..78dc9ab4d88 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -923,7 +923,6 @@ report-user (No default value) rpl-recovery-rank 0 safe-user-create FALSE secure-auth FALSE -secure-file-priv (No default value) server-id 0 show-slave-auth-info FALSE skip-grant-tables TRUE diff --git a/mysql-test/r/mysqld--help-win.result b/mysql-test/r/mysqld--help-win.result index 2ce9e763b14..1d56da7aa5e 100644 --- a/mysql-test/r/mysqld--help-win.result +++ b/mysql-test/r/mysqld--help-win.result @@ -931,7 +931,6 @@ report-user (No default value) rpl-recovery-rank 0 safe-user-create FALSE secure-auth FALSE -secure-file-priv (No default value) server-id 0 shared-memory FALSE shared-memory-base-name MYSQL diff --git a/mysql-test/suite/auth_sec/r/secure_file_priv_error.result b/mysql-test/suite/auth_sec/r/secure_file_priv_error.result new file mode 100644 index 00000000000..4bb4d87c5f0 --- /dev/null +++ b/mysql-test/suite/auth_sec/r/secure_file_priv_error.result @@ -0,0 +1,7 @@ +#----------------------------------------------------------------------- +# Setup +# Try to restart server with invalid value for --secure-file-priv +# Search for : Failed to access directory for --secure-file-priv. +# Restart completed. +# Restart +#----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/r/secure_file_priv_null.result b/mysql-test/suite/auth_sec/r/secure_file_priv_null.result new file mode 100644 index 00000000000..e2a5102c627 --- /dev/null +++ b/mysql-test/suite/auth_sec/r/secure_file_priv_null.result @@ -0,0 +1,21 @@ +#----------------------------------------------------------------------- +# Setup +#----------------------------------------------------------------------- +# Search for : --secure-file-priv is set to NULL. Operations +# related to importing and exporting data are +# disabled +show variables like 'secure_file_priv'; +Variable_name Value +secure_file_priv null +use test; +drop table if exists secure_file_priv_test_null; +create table secure_file_priv_test_null(c1 int); +insert into secure_file_priv_test_null values (1), (2), (3), (4); +select * from secure_file_priv_test_null into outfile 'blah'; +ERROR HY000: The MySQL server is running with the --secure-file-priv option so it cannot execute this statement +select * from secure_file_priv_test_null into outfile 'null/blah'; +ERROR HY000: The MySQL server is running with the --secure-file-priv option so it cannot execute this statement +drop table secure_file_priv_test_null; +#----------------------------------------------------------------------- +# Clean-up +#----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/r/secure_file_priv_warnings.result b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings.result new file mode 100644 index 00000000000..3b80cbe8d6f --- /dev/null +++ b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings.result @@ -0,0 +1,17 @@ +#----------------------------------------------------------------------- +# Setup +#----------------------------------------------------------------------- +# Search for : Insecure configuration for --secure-file-priv: Current +# value does not restrict location of generated files. +# Consider setting it to a valid, non-empty path. +SHOW VARIABLES LIKE 'secure_file_priv'; +Variable_name Value +secure_file_priv +#----------------------------------------------------------------------- +# Restart completed. +# Search for : Insecure configuration for --secure-file-priv: Plugin +# directory is accessible through --secure-file-priv. +# Consider choosing a different directory. +#----------------------------------------------------------------------- +# Clean-up +#----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_not_win.result b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_not_win.result new file mode 100644 index 00000000000..84e2f8ac3c2 --- /dev/null +++ b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_not_win.result @@ -0,0 +1,9 @@ +#----------------------------------------------------------------------- +# Search for : Insecure configuration for --secure-file-priv: Data +# directory is accessible through --secure-file-priv. +# Consider choosing a different directory. +#----------------------------------------------------------------------- +# Search for : Insecure configuration for --secure-file-priv: Location +# is accessible to all OS users. Consider choosing a +# different directory. +#----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_win.result b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_win.result new file mode 100644 index 00000000000..3beff6c4747 --- /dev/null +++ b/mysql-test/suite/auth_sec/r/secure_file_priv_warnings_win.result @@ -0,0 +1,8 @@ +#----------------------------------------------------------------------- +# Test 2 : Restarting mysqld with : +# --secure-file-priv=MYSQLTEST_VARDIR/mysqld.1/Data +# Restart completed. +# Search for : Insecure configuration for --secure-file-priv: Data +# directory is accessible through --secure-file-priv. +# Consider choosing a different directory. +#----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_error.test b/mysql-test/suite/auth_sec/t/secure_file_priv_error.test new file mode 100644 index 00000000000..9f8d185d8f5 --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_error.test @@ -0,0 +1,39 @@ +--source include/no_valgrind_without_big.inc +--source include/not_embedded.inc + +--echo #----------------------------------------------------------------------- +--echo # Setup +let restart_log= $MYSQLTEST_VARDIR/log/my_restart.err; +let SEARCH_FILE= $restart_log; +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; + +--echo # Try to restart server with invalid value for --secure-file-priv +--exec echo "wait" > $restart_file +--shutdown_server +--source include/wait_until_disconnected.inc + +--error 0,1 +--remove_file $restart_log +# Following should fail +--error 1 +--exec $MYSQLD_CMD --secure-file-priv=blahblahblah --loose-console > $restart_log 2>&1 + +--echo # Search for : Failed to access directory for --secure-file-priv. +let SEARCH_PATTERN= Failed to access directory for --secure-file-priv; +--source include/search_pattern_in_file.inc + +--remove_file $restart_log + +--source include/wait_until_disconnected.inc +# Dummy argument for restart +--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +--echo # Restart completed. + +--echo # Restart +--disable_warnings +--source include/force_restart.inc +--enable_warnings +--echo #----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_null-master.opt b/mysql-test/suite/auth_sec/t/secure_file_priv_null-master.opt new file mode 100644 index 00000000000..80d7f3cd469 --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_null-master.opt @@ -0,0 +1 @@ +--secure-file-priv=null diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_null.test b/mysql-test/suite/auth_sec/t/secure_file_priv_null.test new file mode 100644 index 00000000000..8d394a13589 --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_null.test @@ -0,0 +1,42 @@ +--source include/no_valgrind_without_big.inc +--source include/not_embedded.inc + +--echo #----------------------------------------------------------------------- +--echo # Setup +let server_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_FILE= $server_log; +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +--echo #----------------------------------------------------------------------- + +--echo # Search for : --secure-file-priv is set to NULL. Operations +--echo # related to importing and exporting data are +--echo # disabled +let SEARCH_PATTERN= --secure-file-priv is set to NULL. Operations related to importing and exporting data are disabled; +--source include/search_pattern_in_file.inc + +connect(test4_con,localhost,root,,,,,); +show variables like 'secure_file_priv'; + +use test; +--disable_warnings +drop table if exists secure_file_priv_test_null; +--enable_warnings +create table secure_file_priv_test_null(c1 int); +insert into secure_file_priv_test_null values (1), (2), (3), (4); +--error 1290 +select * from secure_file_priv_test_null into outfile 'blah'; +--error 1290 +select * from secure_file_priv_test_null into outfile 'null/blah'; +drop table secure_file_priv_test_null; + +connection default; +disconnect test4_con; + +--echo #----------------------------------------------------------------------- + +--echo # Clean-up +--disable_warnings +--source include/force_restart.inc +--enable_warnings + +--echo #----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_warnings-master.opt b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings-master.opt new file mode 100644 index 00000000000..22520f0aa99 --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings-master.opt @@ -0,0 +1 @@ +--secure-file-priv="" diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_warnings.test b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings.test new file mode 100644 index 00000000000..cc7a79d5b3c --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings.test @@ -0,0 +1,47 @@ +--source include/no_valgrind_without_big.inc +--source include/not_embedded.inc + +--echo #----------------------------------------------------------------------- +--echo # Setup +let server_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_FILE= $server_log; +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +let PLUGIN_DIR= $MYSQLTEST_VARDIR/tmp; +--echo #----------------------------------------------------------------------- + +--echo # Search for : Insecure configuration for --secure-file-priv: Current +--echo # value does not restrict location of generated files. +--echo # Consider setting it to a valid, non-empty path. +let SEARCH_PATTERN= Insecure configuration for --secure-file-priv: Current value does not restrict location of generated files. Consider setting it to a valid, non-empty path.; +--source include/search_pattern_in_file.inc + +# Must show empty string +SHOW VARIABLES LIKE 'secure_file_priv'; + +--echo #----------------------------------------------------------------------- + +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +--exec echo "wait" > $restart_file +--shutdown_server +--source include/wait_until_disconnected.inc +--remove_file $server_log +--exec echo "restart:--plugin-dir=$PLUGIN_DIR --secure-file-priv=$PLUGIN_DIR" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +--echo # Restart completed. + +--echo # Search for : Insecure configuration for --secure-file-priv: Plugin +--echo # directory is accessible through --secure-file-priv. +--echo # Consider choosing a different directory. +let SEARCH_PATTERN= Insecure configuration for --secure-file-priv: Plugin directory is accessible through --secure-file-priv. Consider choosing a different directory.; +--source include/search_pattern_in_file.inc + +--echo #----------------------------------------------------------------------- + +--echo # Clean-up +--disable_warnings +--source include/force_restart.inc +--enable_warnings + +--echo #----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_not_win.test b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_not_win.test new file mode 100644 index 00000000000..ec027d4a743 --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_not_win.test @@ -0,0 +1,24 @@ +--source include/no_valgrind_without_big.inc +--source include/not_windows.inc +--source include/not_embedded.inc + +let server_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_FILE= $server_log; + +--echo #----------------------------------------------------------------------- + +--echo # Search for : Insecure configuration for --secure-file-priv: Data +--echo # directory is accessible through --secure-file-priv. +--echo # Consider choosing a different directory. +let SEARCH_PATTERN= Insecure configuration for --secure-file-priv: Data directory is accessible through --secure-file-priv. Consider choosing a different directory.; +--source include/search_pattern_in_file.inc + +--echo #----------------------------------------------------------------------- + +--echo # Search for : Insecure configuration for --secure-file-priv: Location +--echo # is accessible to all OS users. Consider choosing a +--echo # different directory. +let SEARCH_PATTERN= Insecure configuration for --secure-file-priv: Location is accessible to all OS users. Consider choosing a different directory.; +--source include/search_pattern_in_file.inc + +--echo #----------------------------------------------------------------------- diff --git a/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_win.test b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_win.test new file mode 100644 index 00000000000..bb175fb40ea --- /dev/null +++ b/mysql-test/suite/auth_sec/t/secure_file_priv_warnings_win.test @@ -0,0 +1,35 @@ +--source include/no_valgrind_without_big.inc +--source include/windows.inc +--source include/not_embedded.inc + +let server_log= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_FILE= $server_log; + +--echo #----------------------------------------------------------------------- + +--echo # Test 2 : Restarting mysqld with : +--echo # --secure-file-priv=MYSQLTEST_VARDIR/mysqld.1/Data + +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +--exec echo "wait" > $restart_file +--shutdown_server +--source include/wait_until_disconnected.inc +--error 0,1 +--remove_file $server_log +--exec echo "restart: --secure-file-priv=$MYSQLTEST_VARDIR/mysqld.1/Data" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +--echo # Restart completed. + +--echo # Search for : Insecure configuration for --secure-file-priv: Data +--echo # directory is accessible through --secure-file-priv. +--echo # Consider choosing a different directory. +let SEARCH_PATTERN= Insecure configuration for --secure-file-priv: Data directory is accessible through --secure-file-priv. Consider choosing a different directory.; +--source include/search_pattern_in_file.inc + +--disable_warnings +--source include/force_restart.inc +--enable_warnings + +--echo #----------------------------------------------------------------------- diff --git a/packaging/rpm-oel/mysql-systemd-start b/packaging/rpm-oel/mysql-systemd-start index fab7b3627b3..231a76087ac 100644 --- a/packaging/rpm-oel/mysql-systemd-start +++ b/packaging/rpm-oel/mysql-systemd-start @@ -30,6 +30,12 @@ install_db () { if [ -x /usr/sbin/restorecon ]; then /usr/sbin/restorecon "$datadir" /usr/sbin/restorecon $log + for dir in /var/lib/mysql-files ; do + if [ -x /usr/sbin/semanage -a -d /var/lib/mysql -a -d $dir ] ; then + /usr/sbin/semanage fcontext -a -e /var/lib/mysql $dir >/dev/null 2>&1 + /sbin/restorecon $dir + fi + done fi # If special mysql dir is in place, skip db install diff --git a/packaging/rpm-oel/mysql.init b/packaging/rpm-oel/mysql.init index aaea498d153..75ae672801b 100644 --- a/packaging/rpm-oel/mysql.init +++ b/packaging/rpm-oel/mysql.init @@ -82,7 +82,15 @@ start(){ fi chown mysql:mysql "$datadir" chmod 0755 "$datadir" - [ -x /sbin/restorecon ] && /sbin/restorecon "$datadir" + if [ -x /sbin/restorecon ]; then + /sbin/restorecon "$datadir" + for dir in /var/lib/mysql-files ; do + if [ -x /usr/sbin/semanage -a -d /var/lib/mysql -a -d $dir ] ; then + /usr/sbin/semanage fcontext -a -e /var/lib/mysql $dir >/dev/null 2>&1 + /sbin/restorecon $dir + fi + done + fi # Now create the database action $"Initializing MySQL database: " /usr/bin/mysql_install_db --rpm --datadir="$datadir" --user=mysql ret=$? diff --git a/packaging/rpm-oel/mysql.spec.in b/packaging/rpm-oel/mysql.spec.in index 409c325b675..7ef294ffa84 100644 --- a/packaging/rpm-oel/mysql.spec.in +++ b/packaging/rpm-oel/mysql.spec.in @@ -560,6 +560,7 @@ MBD=$RPM_BUILD_DIR/%{src_dir} install -d -m 0755 %{buildroot}%{_datadir}/mysql/SELinux/RHEL4 install -d -m 0755 %{buildroot}/var/lib/mysql install -d -m 0755 %{buildroot}/var/run/mysqld +install -d -m 0750 %{buildroot}/var/lib/mysql-files # Install all binaries cd $MBD/release @@ -790,6 +791,7 @@ fi %attr(644, root, root) %config(noreplace,missingok) %{_sysconfdir}/logrotate.d/mysql %dir %attr(755, mysql, mysql) /var/lib/mysql %dir %attr(755, mysql, mysql) /var/run/mysqld +%dir %attr(750, mysql, mysql) /var/lib/mysql-files %files common %defattr(-, root, root, -) @@ -916,6 +918,9 @@ fi %endif %changelog +* Mon Sep 26 2016 Balasubramanian Kandasamy - 5.5.53-1 +- Include mysql-files directory + * Tue Jul 05 2016 Balasubramanian Kandasamy - 5.5.51-1 - Remove mysql_config from client subpackage diff --git a/packaging/rpm-sles/mysql.spec.in b/packaging/rpm-sles/mysql.spec.in index a11dfff7b70..6652cdcccb6 100644 --- a/packaging/rpm-sles/mysql.spec.in +++ b/packaging/rpm-sles/mysql.spec.in @@ -425,6 +425,7 @@ MBD=$RPM_BUILD_DIR/%{src_dir} install -d -m 0755 %{buildroot}/var/lib/mysql install -d -m 0755 %{buildroot}/var/run/mysql install -d -m 0750 %{buildroot}/var/log/mysql +install -d -m 0750 %{buildroot}/var/lib/mysql-files # Install all binaries cd $MBD/release @@ -638,6 +639,7 @@ fi %dir %attr(755, mysql, mysql) /var/lib/mysql %dir %attr(755, mysql, mysql) /var/run/mysql %dir %attr(750, mysql, mysql) /var/log/mysql +%dir %attr(750, mysql, mysql) /var/lib/mysql-files %files common %defattr(-, root, root, -) @@ -783,6 +785,9 @@ fi %attr(755, root, root) %{_libdir}/mysql/libmysqld.so %changelog +* Mon Sep 26 2016 Balasubramanian Kandasamy - 5.5.53-1 +- Include mysql-files directory + * Tue Sep 29 2015 Balasubramanian Kandasamy - 5.5.47-1 - Added conflicts to mysql-connector-c-shared dependencies diff --git a/packaging/solaris/postinstall-solaris.sh b/packaging/solaris/postinstall-solaris.sh index b024d94f158..a31e151e1bb 100644 --- a/packaging/solaris/postinstall-solaris.sh +++ b/packaging/solaris/postinstall-solaris.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,6 +26,7 @@ mygroup=mysql myuser=mysql mydatadir=/var/lib/mysql basedir=@@basedir@@ +mysecurefiledir=/var/lib/mysql-files if [ -n "$BASEDIR" ] ; then basedir="$BASEDIR" @@ -58,6 +59,11 @@ fi chown -R $myuser:$mygroup $mydatadir +# Create securefile directory +[ -d "$mysecurefiledir" ] || mkdir -p -m 770 "$mysecurefiledir" || exit 1 +chown -R $myuser:$mygroup $mysecurefiledir + + # Solaris patch 119255 (somewhere around revision 42) changes the behaviour # of pkgadd to set TMPDIR internally to a root-owned install directory. This # has the unfortunate side effect of breaking running mysql_install_db with diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e979ea1b731..2429db0774b 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -570,6 +570,7 @@ uint mysql_real_data_home_len, mysql_data_home_len= 1; uint reg_ext_length; const key_map key_map_empty(0); key_map key_map_full(0); // Will be initialized later +char secure_file_real_path[FN_REFLEN]; DATE_TIME_FORMAT global_date_format, global_datetime_format, global_time_format; Time_zone *default_tz; @@ -7598,9 +7599,9 @@ bool is_secure_file_path(char *path) char buff1[FN_REFLEN], buff2[FN_REFLEN]; size_t opt_secure_file_priv_len; /* - All paths are secure if opt_secure_file_path is 0 + All paths are secure if opt_secure_file_priv is 0 */ - if (!opt_secure_file_priv) + if (!opt_secure_file_priv[0]) return TRUE; opt_secure_file_priv_len= strlen(opt_secure_file_priv); @@ -7608,6 +7609,9 @@ bool is_secure_file_path(char *path) if (strlen(path) >= FN_REFLEN) return FALSE; + if (!my_strcasecmp(system_charset_info, opt_secure_file_priv, "NULL")) + return FALSE; + if (my_realpath(buff1, path, 0)) { /* @@ -7640,9 +7644,184 @@ bool is_secure_file_path(char *path) } +/** + check_secure_file_priv_path : Checks path specified through + --secure-file-priv and raises warning in following cases: + 1. If path is empty string or NULL and mysqld is not running + with --bootstrap mode. + 2. If path can access data directory + 3. If path points to a directory which is accessible by + all OS users (non-Windows build only) + + It throws error in following cases: + + 1. If path normalization fails + 2. If it can not get stats of the directory + + @params NONE + + Assumptions : + 1. Data directory path has been normalized + 2. opt_secure_file_priv has been normalized unless it is set + to "NULL". + + @returns Status of validation + @retval true : Validation is successful with/without warnings + @retval false : Validation failed. Error is raised. +*/ + +bool check_secure_file_priv_path() +{ + char datadir_buffer[FN_REFLEN+1]={0}; + char plugindir_buffer[FN_REFLEN+1]={0}; + char whichdir[20]= {0}; + size_t opt_plugindir_len= 0; + size_t opt_datadir_len= 0; + size_t opt_secure_file_priv_len= 0; + bool warn= false; + bool case_insensitive_fs; +#ifndef _WIN32 + MY_STAT dir_stat; +#endif + + if (!opt_secure_file_priv[0]) + { + if (opt_bootstrap) + { + /* + Do not impose --secure-file-priv restriction + in --bootstrap mode + */ + sql_print_information("Ignoring --secure-file-priv value as server is " + "running with --bootstrap."); + } + else + { + sql_print_warning("Insecure configuration for --secure-file-priv: " + "Current value does not restrict location of generated " + "files. Consider setting it to a valid, " + "non-empty path."); + } + return true; + } + + /* + Setting --secure-file-priv to NULL would disable + reading/writing from/to file + */ + if(!my_strcasecmp(system_charset_info, opt_secure_file_priv, "NULL")) + { + sql_print_information("--secure-file-priv is set to NULL. " + "Operations related to importing and exporting " + "data are disabled"); + return true; + } + + /* + Check if --secure-file-priv can access data directory + */ + opt_secure_file_priv_len= strlen(opt_secure_file_priv); + + /* + Adds dir seperator at the end. + This is required in subsequent comparison + */ + convert_dirname(datadir_buffer, mysql_unpacked_real_data_home, NullS); + opt_datadir_len= strlen(datadir_buffer); + + case_insensitive_fs= + (test_if_case_insensitive(datadir_buffer) == 1); + + if (!case_insensitive_fs) + { + if (!strncmp(datadir_buffer, opt_secure_file_priv, + opt_datadir_len < opt_secure_file_priv_len ? + opt_datadir_len : opt_secure_file_priv_len)) + { + warn= true; + strcpy(whichdir, "Data directory"); + } + } + else + { + if (!files_charset_info->coll->strnncoll(files_charset_info, + (uchar *) datadir_buffer, + opt_datadir_len, + (uchar *) opt_secure_file_priv, + opt_secure_file_priv_len, + TRUE)) + { + warn= true; + strcpy(whichdir, "Data directory"); + } + } + + /* + Don't bother comparing --secure-file-priv with --plugin-dir + if we already have a match against --datadir or + --plugin-dir is not pointing to a valid directory. + */ + if (!warn && !my_realpath(plugindir_buffer, opt_plugin_dir, 0)) + { + convert_dirname(plugindir_buffer, plugindir_buffer, NullS); + opt_plugindir_len= strlen(plugindir_buffer); + + if (!case_insensitive_fs) + { + if (!strncmp(plugindir_buffer, opt_secure_file_priv, + opt_plugindir_len < opt_secure_file_priv_len ? + opt_plugindir_len : opt_secure_file_priv_len)) + { + warn= true; + strcpy(whichdir, "Plugin directory"); + } + } + else + { + if (!files_charset_info->coll->strnncoll(files_charset_info, + (uchar *) plugindir_buffer, + opt_plugindir_len, + (uchar *) opt_secure_file_priv, + opt_secure_file_priv_len, + TRUE)) + { + warn= true; + strcpy(whichdir, "Plugin directory"); + } + } + } + + + if (warn) + sql_print_warning("Insecure configuration for --secure-file-priv: " + "%s is accessible through " + "--secure-file-priv. Consider choosing a different " + "directory.", whichdir); + +#ifndef _WIN32 + /* + Check for --secure-file-priv directory's permission + */ + if (!(my_stat(opt_secure_file_priv, &dir_stat, MYF(0)))) + { + sql_print_error("Failed to get stat for directory pointed out " + "by --secure-file-priv"); + return false; + } + + if (dir_stat.st_mode & S_IRWXO) + sql_print_warning("Insecure configuration for --secure-file-priv: " + "Location is accessible to all OS users. " + "Consider choosing a different directory."); +#endif + return true; +} + + static int fix_paths(void) { char buff[FN_REFLEN],*pos; + bool secure_file_priv_nonempty= false; convert_dirname(mysql_home,mysql_home,NullS); /* Resolve symlinks to allow 'mysql_home' to be a relative symlink */ my_realpath(mysql_home,mysql_home,MYF(0)); @@ -7700,29 +7879,56 @@ static int fix_paths(void) Convert the secure-file-priv option to system format, allowing a quick strcmp to check if read or write is in an allowed dir */ - if (opt_secure_file_priv) + if (opt_bootstrap) + opt_secure_file_priv= EMPTY_STR.str; + secure_file_priv_nonempty= opt_secure_file_priv[0] ? true : false; + + if (secure_file_priv_nonempty && strlen(opt_secure_file_priv) > FN_REFLEN) { - if (*opt_secure_file_priv == 0) - { - my_free(opt_secure_file_priv); - opt_secure_file_priv= 0; - } - else - { - if (strlen(opt_secure_file_priv) >= FN_REFLEN) - opt_secure_file_priv[FN_REFLEN-1]= '\0'; - if (my_realpath(buff, opt_secure_file_priv, 0)) - { - sql_print_warning("Failed to normalize the argument for --secure-file-priv."); - return 1; - } - char *secure_file_real_path= (char *)my_malloc(FN_REFLEN, MYF(MY_FAE)); - convert_dirname(secure_file_real_path, buff, NullS); - my_free(opt_secure_file_priv); - opt_secure_file_priv= secure_file_real_path; - } + sql_print_warning("Value for --secure-file-priv is longer than maximum " + "limit of %d", FN_REFLEN-1); + return 1; } - + + memset(buff, 0, sizeof(buff)); + if (secure_file_priv_nonempty && + my_strcasecmp(system_charset_info, opt_secure_file_priv, "NULL")) + { + int retval= my_realpath(buff, opt_secure_file_priv, MYF(MY_WME)); + if (!retval) + { + convert_dirname(secure_file_real_path, buff, NullS); +#ifdef WIN32 + MY_DIR *dir= my_dir(secure_file_real_path, MYF(MY_DONT_SORT+MY_WME)); + if (!dir) + { + retval= 1; + } + else + { + my_dirend(dir); + } +#endif + } + + if (retval) + { + char err_buffer[FN_REFLEN]; + my_snprintf(err_buffer, FN_REFLEN-1, + "Failed to access directory for --secure-file-priv." + " Please make sure that directory exists and is " + "accessible by MySQL Server. Supplied value : %s", + opt_secure_file_priv); + err_buffer[FN_REFLEN-1]='\0'; + sql_print_error("%s", err_buffer); + return 1; + } + opt_secure_file_priv= secure_file_real_path; + } + + if (!check_secure_file_priv_path()) + return 1; + return 0; } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0696021cfc0..d9fda85d8f6 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -68,6 +68,8 @@ char internal_table_name[2]= "*"; char empty_c_string[1]= {0}; /* used for not defined db */ +LEX_STRING EMPTY_STR= { (char *) "", 0 }; + const char * const THD::DEFAULT_WHERE= "field list"; diff --git a/sql/sql_class.h b/sql/sql_class.h index dcc7458ee50..aa6745e4564 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -105,6 +105,7 @@ enum enum_filetype { FILETYPE_CSV, FILETYPE_XML }; extern char internal_table_name[2]; extern char empty_c_string[1]; +extern LEX_STRING EMPTY_STR; extern MYSQL_PLUGIN_IMPORT const char **errmesg; extern bool volatile shutdown_in_progress; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index d08cb4f8ca8..6fd728d638d 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1941,8 +1941,12 @@ static Sys_var_charptr Sys_secure_file_priv( "secure_file_priv", "Limit LOAD DATA, SELECT ... OUTFILE, and LOAD_FILE() to files " "within specified directory", - PREALLOCATED READ_ONLY GLOBAL_VAR(opt_secure_file_priv), - CMD_LINE(REQUIRED_ARG), IN_FS_CHARSET, DEFAULT(0)); + READ_ONLY GLOBAL_VAR(opt_secure_file_priv), +#ifndef EMBEDDED_LIBRARY + CMD_LINE(REQUIRED_ARG), IN_FS_CHARSET, DEFAULT(DEFAULT_SECURE_FILE_PRIV_DIR)); +#else + CMD_LINE(REQUIRED_ARG), IN_FS_CHARSET, DEFAULT(DEFAULT_SECURE_FILE_PRIV_EMBEDDED_DIR)); +#endif static bool fix_server_id(sys_var *self, THD *thd, enum_var_type type) { diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index 5af4783f919..211ed4f3888 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -562,6 +562,7 @@ install -d $RBR%{_includedir} install -d $RBR%{_libdir} install -d $RBR%{_mandir} install -d $RBR%{_sbindir} +install -d $RBR/var/lib/mysql-files mkdir -p $RBR%{_sysconfdir}/my.cnf.d @@ -1141,6 +1142,7 @@ echo "=====" >> $STATUS_HISTORY %attr(755, root, root) %{_sysconfdir}/init.d/mysql %attr(755, root, root) %{_datadir}/mysql/ +%dir %attr(750, mysql, mysql) /var/lib/mysql-files # ---------------------------------------------------------------------------- %files -n MySQL-client%{product_suffix} @@ -1226,6 +1228,9 @@ echo "=====" >> $STATUS_HISTORY # merging BK trees) ############################################################################## %changelog +* Mon Sep 26 2016 Balasubramanian Kandasamy +- Include mysql-files directory + * Wed Jul 02 2014 Bjorn Munch - Disable dtrace unconditionally, breaks after we install Oracle dtrace From 5c6169fb309981b564a17bee31b367a18866d674 Mon Sep 17 00:00:00 2001 From: Robert Golebiowski Date: Tue, 27 Sep 2016 11:17:38 +0200 Subject: [PATCH 57/96] Bug #24740291: YASSL UPDATE TO 2.4.2 --- extra/yassl/README | 18 +++ extra/yassl/certs/dsa-cert.pem | 38 ++--- extra/yassl/include/openssl/ssl.h | 2 +- extra/yassl/src/ssl.cpp | 60 +++++--- extra/yassl/taocrypt/include/aes.hpp | 58 ++++++++ extra/yassl/taocrypt/include/integer.hpp | 3 + extra/yassl/taocrypt/src/aes.cpp | 172 ++++++++++++++--------- extra/yassl/taocrypt/src/asn.cpp | 24 ++-- extra/yassl/taocrypt/src/dsa.cpp | 16 ++- extra/yassl/taocrypt/test/test.cpp | 3 + extra/yassl/testsuite/test.hpp | 2 +- 11 files changed, 274 insertions(+), 122 deletions(-) diff --git a/extra/yassl/README b/extra/yassl/README index b5eb88824fb..a3d4f60f561 100644 --- a/extra/yassl/README +++ b/extra/yassl/README @@ -12,6 +12,24 @@ before calling SSL_new(); *** end Note *** +yaSSL Release notes, version 2.4.2 (9/22/2016) + This release of yaSSL fixes a medium security vulnerability. A fix for + potential AES side channel leaks is included that a local user monitoring + the same CPU core cache could exploit. VM users, hyper-threading users, + and users where potential attackers have access to the CPU cache will need + to update if they utilize AES. + + DSA padding fixes for unusual sizes is included as well. Users with DSA + certficiates should update. + +yaSSL Release notes, version 2.4.0 (5/20/2016) + This release of yaSSL fixes the OpenSSL compatibility function + SSL_CTX_load_verify_locations() when using the path directory to allow + unlimited path sizes. Minor Windows build fixes are included. + No high level security fixes in this version but we always recommend + updating. + + yaSSL Release notes, version 2.3.9b (2/03/2016) This release of yaSSL fixes the OpenSSL compatibility function X509_NAME_get_index_by_NID() to use the actual index of the common name diff --git a/extra/yassl/certs/dsa-cert.pem b/extra/yassl/certs/dsa-cert.pem index 10d533edc88..10794cbee73 100644 --- a/extra/yassl/certs/dsa-cert.pem +++ b/extra/yassl/certs/dsa-cert.pem @@ -1,22 +1,22 @@ -----BEGIN CERTIFICATE----- -MIIDqzCCA2ugAwIBAgIJAMGqrgDU6DyhMAkGByqGSM44BAMwgY4xCzAJBgNVBAYT +MIIDrzCCA2+gAwIBAgIJAK1zRM7YFcNjMAkGByqGSM44BAMwgZAxCzAJBgNVBAYT AlVTMQ8wDQYDVQQIDAZPcmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQK -DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wu -Y29tMR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tMB4XDTEzMDQyMjIw -MDk0NFoXDTE2MDExNzIwMDk0NFowgY4xCzAJBgNVBAYTAlVTMQ8wDQYDVQQIDAZP -cmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQKDAd3b2xmU1NMMRAwDgYD -VQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wuY29tMR8wHQYJKoZIhvcN -AQkBFhBpbmZvQHdvbGZzc2wuY29tMIIBuDCCASwGByqGSM44BAEwggEfAoGBAL1R -7koy4IrH6sbh6nDEUUPPKgfhxxLCWCVexF2+qzANEr+hC9M002haJXFOfeS9DyoO -WFbL0qMZOuqv+22CaHnoUWl7q3PjJOAI3JH0P54ZyUPuU1909RzgTdIDp5+ikbr7 -KYjnltL73FQVMbjTZQKthIpPn3MjYcF+4jp2W2zFAhUAkcntYND6MGf+eYzIJDN2 -L7SonHUCgYEAklpxErfqznIZjVvqqHFaq+mgAL5J8QrKVmdhYZh/Y8z4jCjoCA8o -TDoFKxf7s2ZzgaPKvglaEKiYqLqic9qY78DYJswzQMLFvjsF4sFZ+pYCBdWPQI4N -PgxCiznK6Ce+JH9ikSBvMvG+tevjr2UpawDIHX3+AWYaZBZwKADAaboDgYUAAoGB -AJ3LY89yHyvQ/TsQ6zlYbovjbk/ogndsMqPdNUvL4RuPTgJP/caaDDa0XJ7ak6A7 -TJ+QheLNwOXoZPYJC4EGFSDAXpYniGhbWIrVTCGe6lmZDfnx40WXS0kk3m/DHaC0 -3ElLAiybxVGxyqoUfbT3Zv1JwftWMuiqHH5uADhdXuXVo1AwTjAdBgNVHQ4EFgQU -IJjk416o4v8qpH9LBtXlR9v8gccwHwYDVR0jBBgwFoAUIJjk416o4v8qpH9LBtXl -R9v8gccwDAYDVR0TBAUwAwEB/zAJBgcqhkjOOAQDAy8AMCwCFCjGKIdOSV12LcTu -k08owGM6YkO1AhQe+K173VuaO/OsDNsxZlKpyH8+1g== +DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRgwFgYDVQQDDA93d3cud29sZnNz +bC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb20wHhcNMTYwOTIy +MjEyMzA0WhcNMjIwMzE1MjEyMzA0WjCBkDELMAkGA1UEBhMCVVMxDzANBgNVBAgM +Bk9yZWdvbjERMA8GA1UEBwwIUG9ydGxhbmQxEDAOBgNVBAoMB3dvbGZTU0wxEDAO +BgNVBAsMB3Rlc3RpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqG +SIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCAbgwggEsBgcqhkjOOAQBMIIBHwKB +gQC9Ue5KMuCKx+rG4epwxFFDzyoH4ccSwlglXsRdvqswDRK/oQvTNNNoWiVxTn3k +vQ8qDlhWy9KjGTrqr/ttgmh56FFpe6tz4yTgCNyR9D+eGclD7lNfdPUc4E3SA6ef +opG6+ymI55bS+9xUFTG402UCrYSKT59zI2HBfuI6dltsxQIVAJHJ7WDQ+jBn/nmM +yCQzdi+0qJx1AoGBAJJacRK36s5yGY1b6qhxWqvpoAC+SfEKylZnYWGYf2PM+Iwo +6AgPKEw6BSsX+7Nmc4Gjyr4JWhComKi6onPamO/A2CbMM0DCxb47BeLBWfqWAgXV +j0CODT4MQos5yugnviR/YpEgbzLxvrXr469lKWsAyB19/gFmGmQWcCgAwGm6A4GF +AAKBgQCdy2PPch8r0P07EOs5WG6L425P6IJ3bDKj3TVLy+Ebj04CT/3Gmgw2tFye +2pOgO0yfkIXizcDl6GT2CQuBBhUgwF6WJ4hoW1iK1UwhnupZmQ358eNFl0tJJN5v +wx2gtNxJSwIsm8VRscqqFH2092b9ScH7VjLoqhx+bgA4XV7l1aNQME4wHQYDVR0O +BBYEFCCY5ONeqOL/KqR/SwbV5Ufb/IHHMB8GA1UdIwQYMBaAFCCY5ONeqOL/KqR/ +SwbV5Ufb/IHHMAwGA1UdEwQFMAMBAf8wCQYHKoZIzjgEAwMvADAsAhQRYSCVN/Ge +agV3mffU3qNZ92fI0QIUPH7Jp+iASI7U1ocaYDc10qXGaGY= -----END CERTIFICATE----- diff --git a/extra/yassl/include/openssl/ssl.h b/extra/yassl/include/openssl/ssl.h index 83daf3cc81f..0609dfc0592 100644 --- a/extra/yassl/include/openssl/ssl.h +++ b/extra/yassl/include/openssl/ssl.h @@ -35,7 +35,7 @@ #include "rsa.h" -#define YASSL_VERSION "2.3.9b" +#define YASSL_VERSION "2.4.2" #if defined(__cplusplus) diff --git a/extra/yassl/src/ssl.cpp b/extra/yassl/src/ssl.cpp index cde32df4f43..1925e2f7592 100644 --- a/extra/yassl/src/ssl.cpp +++ b/extra/yassl/src/ssl.cpp @@ -161,7 +161,7 @@ int read_file(SSL_CTX* ctx, const char* file, int format, CertType type) TaoCrypt::DSA_PrivateKey dsaKey; dsaKey.Initialize(dsaSource); - if (rsaSource.GetError().What()) { + if (dsaSource.GetError().What()) { // neither worked ret = SSL_FAILURE; } @@ -784,40 +784,67 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file, WIN32_FIND_DATA FindFileData; HANDLE hFind; - char name[MAX_PATH + 1]; // directory specification - strncpy(name, path, MAX_PATH - 3); - strncat(name, "\\*", 3); + const int DELIMITER_SZ = 2; + const int DELIMITER_STAR_SZ = 3; + int pathSz = (int)strlen(path); + int nameSz = pathSz + DELIMITER_STAR_SZ + 1; // plus 1 for terminator + char* name = NEW_YS char[nameSz]; // directory specification + memset(name, 0, nameSz); + strncpy(name, path, nameSz - DELIMITER_STAR_SZ - 1); + strncat(name, "\\*", DELIMITER_STAR_SZ); hFind = FindFirstFile(name, &FindFileData); - if (hFind == INVALID_HANDLE_VALUE) return SSL_BAD_PATH; + if (hFind == INVALID_HANDLE_VALUE) { + ysArrayDelete(name); + return SSL_BAD_PATH; + } do { - if (FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) { - strncpy(name, path, MAX_PATH - 2 - HALF_PATH); - strncat(name, "\\", 2); - strncat(name, FindFileData.cFileName, HALF_PATH); + if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + int curSz = (int)strlen(FindFileData.cFileName); + if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) { + ysArrayDelete(name); + // plus 1 for terminator + nameSz = pathSz + curSz + DELIMITER_SZ + 1; + name = NEW_YS char[nameSz]; + } + memset(name, 0, nameSz); + strncpy(name, path, nameSz - curSz - DELIMITER_SZ - 1); + strncat(name, "\\", DELIMITER_SZ); + strncat(name, FindFileData.cFileName, + nameSz - pathSz - DELIMITER_SZ - 1); ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA); } } while (ret == SSL_SUCCESS && FindNextFile(hFind, &FindFileData)); + ysArrayDelete(name); FindClose(hFind); #else // _WIN32 - - const int MAX_PATH = 260; - DIR* dir = opendir(path); if (!dir) return SSL_BAD_PATH; struct dirent* entry; struct stat buf; - char name[MAX_PATH + 1]; + const int DELIMITER_SZ = 1; + int pathSz = (int)strlen(path); + int nameSz = pathSz + DELIMITER_SZ + 1; //plus 1 for null terminator + char* name = NEW_YS char[nameSz]; // directory specification while (ret == SSL_SUCCESS && (entry = readdir(dir))) { - strncpy(name, path, MAX_PATH - 1 - HALF_PATH); - strncat(name, "/", 1); - strncat(name, entry->d_name, HALF_PATH); + int curSz = (int)strlen(entry->d_name); + if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) { + ysArrayDelete(name); + nameSz = pathSz + DELIMITER_SZ + curSz + 1; + name = NEW_YS char[nameSz]; + } + memset(name, 0, nameSz); + strncpy(name, path, nameSz - curSz - 1); + strncat(name, "/", DELIMITER_SZ); + strncat(name, entry->d_name, nameSz - pathSz - DELIMITER_SZ - 1); + if (stat(name, &buf) < 0) { + ysArrayDelete(name); closedir(dir); return SSL_BAD_STAT; } @@ -826,6 +853,7 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file, ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA); } + ysArrayDelete(name); closedir(dir); #endif diff --git a/extra/yassl/taocrypt/include/aes.hpp b/extra/yassl/taocrypt/include/aes.hpp index 01763033156..bccf6e73fc7 100644 --- a/extra/yassl/taocrypt/include/aes.hpp +++ b/extra/yassl/taocrypt/include/aes.hpp @@ -60,6 +60,7 @@ private: static const word32 Te[5][256]; static const word32 Td[5][256]; + static const byte CTd4[256]; static const word32* Te0; static const word32* Te1; @@ -80,11 +81,68 @@ private: void ProcessAndXorBlock(const byte*, const byte*, byte*) const; + word32 PreFetchTe() const; + word32 PreFetchTd() const; + word32 PreFetchCTd4() const; + AES(const AES&); // hide copy AES& operator=(const AES&); // and assign }; +#if defined(__x86_64__) || defined(_M_X64) || \ + (defined(__ILP32__) && (__ILP32__ >= 1)) + #define TC_CACHE_LINE_SZ 64 +#else + /* default cache line size */ + #define TC_CACHE_LINE_SZ 32 +#endif + +inline word32 AES::PreFetchTe() const +{ + word32 x = 0; + + /* 4 tables of 256 entries */ + for (int i = 0; i < 4; i++) { + /* each entry is 4 bytes */ + for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) { + x &= Te[i][j]; + } + } + + return x; +} + + +inline word32 AES::PreFetchTd() const +{ + word32 x = 0; + + /* 4 tables of 256 entries */ + for (int i = 0; i < 4; i++) { + /* each entry is 4 bytes */ + for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) { + x &= Td[i][j]; + } + } + + return x; +} + + +inline word32 AES::PreFetchCTd4() const +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += TC_CACHE_LINE_SZ) { + x &= CTd4[i]; + } + + return x; +} + + typedef BlockCipher AES_ECB_Encryption; typedef BlockCipher AES_ECB_Decryption; diff --git a/extra/yassl/taocrypt/include/integer.hpp b/extra/yassl/taocrypt/include/integer.hpp index 75a3ee3d3df..05fe189fd58 100644 --- a/extra/yassl/taocrypt/include/integer.hpp +++ b/extra/yassl/taocrypt/include/integer.hpp @@ -119,6 +119,9 @@ namespace TaoCrypt { +#ifdef _WIN32 + #undef max // avoid name clash +#endif // general MAX template inline const T& max(const T& a, const T& b) diff --git a/extra/yassl/taocrypt/src/aes.cpp b/extra/yassl/taocrypt/src/aes.cpp index ee4c7a6e8a1..3fcf80ac202 100644 --- a/extra/yassl/taocrypt/src/aes.cpp +++ b/extra/yassl/taocrypt/src/aes.cpp @@ -109,10 +109,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[3]; rk[4] = rk[0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; @@ -128,10 +128,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[ 5]; rk[ 6] = rk[ 0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; @@ -149,10 +149,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[ 7]; rk[ 8] = rk[ 0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; @@ -161,10 +161,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) break; temp = rk[11]; rk[12] = rk[ 4] ^ - (Te4[GETBYTE(temp, 3)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 0)] & 0x000000ff); + (Te2[GETBYTE(temp, 3)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 0)] & 0x000000ff); rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; @@ -191,25 +191,25 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) for (i = 1; i < rounds_; i++) { rk += 4; rk[0] = - Td0[Te4[GETBYTE(rk[0], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[0], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[0], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[0], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[0], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[0], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[0], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[0], 0)] & 0xff]; rk[1] = - Td0[Te4[GETBYTE(rk[1], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[1], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[1], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[1], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[1], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[1], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[1], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[1], 0)] & 0xff]; rk[2] = - Td0[Te4[GETBYTE(rk[2], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[2], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[2], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[2], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[2], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[2], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[2], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[2], 0)] & 0xff]; rk[3] = - Td0[Te4[GETBYTE(rk[3], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[3], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[3], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[3], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[3], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[3], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[3], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[3], 0)] & 0xff]; } } } @@ -244,6 +244,7 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock, s2 ^= rk[2]; s3 ^= rk[3]; + s0 |= PreFetchTe(); /* * Nr - 1 full rounds: */ @@ -312,28 +313,28 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock, */ s0 = - (Te4[GETBYTE(t0, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t3, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t0, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t1, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t2, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t3, 0)] & 0x000000ff) ^ rk[0]; s1 = - (Te4[GETBYTE(t1, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t0, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t1, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t2, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t3, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t0, 0)] & 0x000000ff) ^ rk[1]; s2 = - (Te4[GETBYTE(t2, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t1, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t2, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t3, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t0, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t1, 0)] & 0x000000ff) ^ rk[2]; s3 = - (Te4[GETBYTE(t3, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t2, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t3, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t0, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t1, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t2, 0)] & 0x000000ff) ^ rk[3]; @@ -358,6 +359,8 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock, s2 ^= rk[2]; s3 ^= rk[3]; + s0 |= PreFetchTd(); + /* * Nr - 1 full rounds: */ @@ -423,29 +426,32 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock, * apply last round and * map cipher state to byte array block: */ + + t0 |= PreFetchCTd4(); + s0 = - (Td4[GETBYTE(t0, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t1, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t0, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t3, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t2, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t1, 0)]) ^ rk[0]; s1 = - (Td4[GETBYTE(t1, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t2, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t1, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t0, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t3, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t2, 0)]) ^ rk[1]; s2 = - (Td4[GETBYTE(t2, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t3, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t2, 3)] << 24 ) ^ + ((word32)CTd4[GETBYTE(t1, 2)] << 16 ) ^ + ((word32)CTd4[GETBYTE(t0, 1)] << 8 ) ^ + ((word32)CTd4[GETBYTE(t3, 0)]) ^ rk[2]; s3 = - (Td4[GETBYTE(t3, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t0, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t3, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t2, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t1, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t0, 0)]) ^ rk[3]; gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3); @@ -1826,18 +1832,52 @@ const word32 AES::Td[5][256] = { } }; +const byte AES::CTd4[256] = +{ + 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, + 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, + 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, + 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, + 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, + 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, + 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, + 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, + 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, + 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, + 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, + 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, + 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, + 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, + 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, + 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, + 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, + 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, + 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, + 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, + 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, + 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, + 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, + 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, + 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, + 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, + 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, + 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, + 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, + 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, + 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, + 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, +}; + const word32* AES::Te0 = AES::Te[0]; const word32* AES::Te1 = AES::Te[1]; const word32* AES::Te2 = AES::Te[2]; const word32* AES::Te3 = AES::Te[3]; -const word32* AES::Te4 = AES::Te[4]; const word32* AES::Td0 = AES::Td[0]; const word32* AES::Td1 = AES::Td[1]; const word32* AES::Td2 = AES::Td[2]; const word32* AES::Td3 = AES::Td[3]; -const word32* AES::Td4 = AES::Td[4]; diff --git a/extra/yassl/taocrypt/src/asn.cpp b/extra/yassl/taocrypt/src/asn.cpp index a210d805452..7ff3c7167d2 100644 --- a/extra/yassl/taocrypt/src/asn.cpp +++ b/extra/yassl/taocrypt/src/asn.cpp @@ -1209,17 +1209,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz) } word32 rLen = GetLength(source); if (rLen != 20) { - if (rLen == 21) { // zero at front, eat + while (rLen > 20 && source.remaining() > 0) { // zero's at front, eat source.next(); --rLen; } - else if (rLen == 19) { // add zero to front so 20 bytes + if (rLen < 20) { // add zero's to front so 20 bytes + word32 tmpLen = rLen; + while (tmpLen < 20) { decoded[0] = 0; decoded++; + tmpLen++; } - else { - source.SetError(DSA_SZ_E); - return 0; } } memcpy(decoded, source.get_buffer() + source.get_index(), rLen); @@ -1232,17 +1232,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz) } word32 sLen = GetLength(source); if (sLen != 20) { - if (sLen == 21) { - source.next(); // zero at front, eat + while (sLen > 20 && source.remaining() > 0) { + source.next(); // zero's at front, eat --sLen; } - else if (sLen == 19) { - decoded[rLen] = 0; // add zero to front so 20 bytes + if (sLen < 20) { // add zero's to front so 20 bytes + word32 tmpLen = sLen; + while (tmpLen < 20) { + decoded[rLen] = 0; decoded++; + tmpLen++; } - else { - source.SetError(DSA_SZ_E); - return 0; } } memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen); diff --git a/extra/yassl/taocrypt/src/dsa.cpp b/extra/yassl/taocrypt/src/dsa.cpp index bf116d3e48d..b19fed9235b 100644 --- a/extra/yassl/taocrypt/src/dsa.cpp +++ b/extra/yassl/taocrypt/src/dsa.cpp @@ -172,6 +172,7 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig, const Integer& q = key_.GetSubGroupOrder(); const Integer& g = key_.GetSubGroupGenerator(); const Integer& x = key_.GetPrivatePart(); + byte* tmpPtr = sig; // initial signature output Integer k(rng, 1, q - 1); @@ -187,22 +188,23 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig, return -1; int rSz = r_.ByteCount(); + int tmpSz = rSz; - if (rSz == 19) { - sig[0] = 0; - sig++; + while (tmpSz++ < SHA::DIGEST_SIZE) { + *sig++ = 0; } r_.Encode(sig, rSz); + sig = tmpPtr + SHA::DIGEST_SIZE; // advance sig output to s int sSz = s_.ByteCount(); + tmpSz = sSz; - if (sSz == 19) { - sig[rSz] = 0; - sig++; + while (tmpSz++ < SHA::DIGEST_SIZE) { + *sig++ = 0; } - s_.Encode(sig + rSz, sSz); + s_.Encode(sig, sSz); return 40; } diff --git a/extra/yassl/taocrypt/test/test.cpp b/extra/yassl/taocrypt/test/test.cpp index a7d5cb3e8af..fc1f0e8762d 100644 --- a/extra/yassl/taocrypt/test/test.cpp +++ b/extra/yassl/taocrypt/test/test.cpp @@ -1277,6 +1277,9 @@ int dsa_test() if (!verifier.Verify(digest, decoded)) return -90; + if (!verifier.Verify(digest, signature)) + return -91; + return 0; } diff --git a/extra/yassl/testsuite/test.hpp b/extra/yassl/testsuite/test.hpp index 5c9dc7ce117..e2e44c24027 100644 --- a/extra/yassl/testsuite/test.hpp +++ b/extra/yassl/testsuite/test.hpp @@ -22,7 +22,6 @@ #define yaSSL_TEST_HPP #include "runtime.hpp" -#include "openssl/ssl.h" /* openssl compatibility test */ #include "error.hpp" #include #include @@ -56,6 +55,7 @@ #endif #define SOCKET_T int #endif /* _WIN32 */ +#include "openssl/ssl.h" /* openssl compatibility test */ #ifdef _MSC_VER From 7497ebf8a49bfe30bb4110f2ac20a30f804b7946 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 29 Sep 2016 10:16:24 +0200 Subject: [PATCH 58/96] mysqld_safe: close stdout and stderr when they're not needed anymore. Helps when daemonizing it from mysql.init --- scripts/mysqld_safe.sh | 6 +++++- support-files/mysql.server.sh | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index 7f18abb3dc2..7cadce725d1 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -620,6 +620,10 @@ else logging=syslog fi +# close stdout and stderr, everything goes to $logging now +exec 1>&- +exec 2>&- + USER_OPTION="" if test -w / -o "$USER" = "root" then @@ -650,7 +654,7 @@ if [ ! -d $mysql_unix_port_dir ] then if ! `mkdir -p $mysql_unix_port_dir` then - echo "Fatal error Can't create database directory '$mysql_unix_port'" + log_error "Fatal error Can't create database directory '$mysql_unix_port'" exit 1 fi chown $user $mysql_unix_port_dir diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index e5c8814f930..d4fff33af13 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -308,7 +308,7 @@ case "$mode" in then # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. - $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args >/dev/null & + $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" $other_args & wait_for_ready; return_value=$? # Make lock for RedHat / SuSE From 9b20d606fb1afd0327356e7c78c2aea774dec3d4 Mon Sep 17 00:00:00 2001 From: Olivier Bertrand Date: Wed, 5 Oct 2016 23:44:54 +0200 Subject: [PATCH 59/96] - Fix MDEV-10948. Syntax error on quoted JDBC tables. Was because the quoting character was always '"' instead of being retrieve from the JDBC source. modified: storage/connect/JdbcInterface.java modified: storage/connect/jdbconn.cpp modified: storage/connect/tabjdbc.cpp --- storage/connect/JdbcInterface.java | 12 ++++++++++++ storage/connect/jdbconn.cpp | 15 +++++++++++++++ storage/connect/tabjdbc.cpp | 19 +++++++++++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/storage/connect/JdbcInterface.java b/storage/connect/JdbcInterface.java index f765052915d..e339c989113 100644 --- a/storage/connect/JdbcInterface.java +++ b/storage/connect/JdbcInterface.java @@ -340,6 +340,18 @@ public class JdbcInterface { return m; } // end of GetMaxValue + public String GetQuoteString() { + String qs = null; + + try { + qs = dbmd.getIdentifierQuoteString(); + } catch(SQLException se) { + SetErrmsg(se); + } // end try/catch + + return qs; + } // end of GetQuoteString + public int GetColumns(String[] parms) { int ncol = -1; diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp index 952847507a0..229ade53ad1 100644 --- a/storage/connect/jdbconn.cpp +++ b/storage/connect/jdbconn.cpp @@ -1011,6 +1011,21 @@ int JDBConn::Open(PJPARM sop) return RC_FX; } // endif Msg + jmethodID qcid = nullptr; + + if (!gmID(g, qcid, "GetQuoteString", "()Ljava/lang/String;")) { + jstring s = (jstring)env->CallObjectMethod(job, qcid); + + if (s != nullptr) { + char *qch = (char*)env->GetStringUTFChars(s, (jboolean)false); + m_IDQuoteChar[0] = *qch; + } else { + s = (jstring)env->CallObjectMethod(job, errid); + Msg = (char*)env->GetStringUTFChars(s, (jboolean)false); + } // endif s + + } // endif qcid + if (gmID(g, typid, "ColumnType", "(ILjava/lang/String;)I")) return RC_FX; else diff --git a/storage/connect/tabjdbc.cpp b/storage/connect/tabjdbc.cpp index 86fd831b262..e398523892f 100644 --- a/storage/connect/tabjdbc.cpp +++ b/storage/connect/tabjdbc.cpp @@ -686,6 +686,9 @@ bool TDBJDBC::MakeInsert(PGLOBAL g) else Prepared = true; + if (trace) + htrc("Insert=%s\n", Query->GetStr()); + return false; } // end of MakeInsert @@ -733,17 +736,18 @@ bool TDBJDBC::MakeCommand(PGLOBAL g) // If so, it must be quoted in the original query strlwr(strcat(strcat(strcpy(name, " "), Name), " ")); - if (!strstr(" update delete low_priority ignore quick from ", name)) - strlwr(strcpy(name, Name)); // Not a keyword - else + if (strstr(" update delete low_priority ignore quick from ", name)) { strlwr(strcat(strcat(strcpy(name, qc), Name), qc)); + k += 2; + } else + strlwr(strcpy(name, Name)); // Not a keyword if ((p = strstr(qrystr, name))) { for (i = 0; i < p - qrystr; i++) stmt[i] = (Qrystr[i] == '`') ? *qc : Qrystr[i]; stmt[i] = 0; - k = i + (int)strlen(Name); + k += i + (int)strlen(Name); if (qtd && *(p-1) == ' ') strcat(strcat(strcat(stmt, qc), TableName), qc); @@ -765,6 +769,9 @@ bool TDBJDBC::MakeCommand(PGLOBAL g) return NULL; } // endif p + if (trace) + htrc("Command=%s\n", stmt); + Query = new(g)STRING(g, 0, stmt); return (!Query->GetSize()); } // end of MakeCommand @@ -1214,6 +1221,10 @@ int TDBJDBC::WriteDB(PGLOBAL g) } // endif oom Query->RepLast(')'); + + if (trace > 1) + htrc("Inserting: %s\n", Query->GetStr()); + rc = Jcp->ExecuteUpdate(Query->GetStr()); Query->Truncate(len); // Restore query From 6010a27c87785643f8880d19c0dced3b724c54da Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 13 Oct 2016 12:23:16 +0200 Subject: [PATCH 60/96] 5.5.52-38.3 --- storage/xtradb/btr/btr0btr.c | 4 +- storage/xtradb/handler/ha_innodb.cc | 108 +++++++++++++++++++++++----- storage/xtradb/include/buf0buf.h | 12 ++++ storage/xtradb/include/buf0buf.ic | 14 ++++ storage/xtradb/include/srv0srv.h | 8 +++ storage/xtradb/include/univ.i | 2 +- storage/xtradb/log/log0log.c | 8 ++- storage/xtradb/log/log0online.c | 12 ++-- storage/xtradb/log/log0recv.c | 2 +- storage/xtradb/mach/mach0data.c | 13 +++- storage/xtradb/srv/srv0srv.c | 42 +++++++---- 11 files changed, 179 insertions(+), 46 deletions(-) diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index dec42f27d3b..0c429363789 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -76,7 +76,7 @@ btr_corruption_report( buf_block_get_zip_size(block), BUF_PAGE_PRINT_NO_CRASH); } - buf_page_print(buf_block_get_frame(block), 0, 0); + buf_page_print(buf_nonnull_block_get_frame(block), 0, 0); } #ifndef UNIV_HOTBACKUP @@ -1077,7 +1077,7 @@ btr_get_size( SRV_CORRUPT_TABLE_CHECK(root, { mtr_commit(mtr); - return(0); + return(ULINT_UNDEFINED); }); if (flag == BTR_N_LEAF_PAGES) { diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 896b27a2047..010aec1ea0d 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -475,6 +475,19 @@ innobase_is_fake_change( THD* thd); /*!< in: MySQL thread handle of the user for whom the transaction is being committed */ +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list); /******************************************************************//** Maps a MySQL trx isolation level code to the InnoDB isolation level code @@ -2710,6 +2723,7 @@ innobase_init( innobase_hton->purge_changed_page_bitmaps = innobase_purge_changed_page_bitmaps; innobase_hton->is_fake_change = innobase_is_fake_change; + innobase_hton->get_parent_fk_list = innobase_get_parent_fk_list; ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); @@ -9721,7 +9735,14 @@ ha_innobase::check( prebuilt->select_lock_type = LOCK_NONE; - if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { + bool check_result + = row_check_index_for_mysql(prebuilt, index, &n_rows); + DBUG_EXECUTE_IF( + "dict_set_index_corrupted", + if (!(index->type & DICT_CLUSTERED)) { + check_result = false; + }); + if (!check_result) { innobase_format_name( index_name, sizeof index_name, index->name, TRUE); @@ -10057,6 +10078,73 @@ get_foreign_key_info( return(pf_key_info); } +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys */ +static +void +fill_foreign_key_list(THD* thd, + const dict_table_t* table, + List* f_key_list) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + for (dict_foreign_t* foreign + = UT_LIST_GET_FIRST(table->referenced_list); + foreign != NULL; + foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) { + + FOREIGN_KEY_INFO* pf_key_info + = get_foreign_key_info(thd, foreign); + if (pf_key_info) { + f_key_list->push_back(pf_key_info); + } + } +} + +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list) +{ + ut_a(strlen(path) <= FN_REFLEN); + char norm_name[FN_REFLEN + 1]; + normalize_table_name(norm_name, path); + + trx_t* parent_trx = check_trx_exists(thd); + parent_trx->op_info = "getting list of referencing foreign keys"; + trx_search_latch_release_if_reserved(parent_trx); + + mutex_enter(&dict_sys->mutex); + + dict_table_t* table + = dict_table_get_low(norm_name, + static_cast( + DICT_ERR_IGNORE_INDEX_ROOT + | DICT_ERR_IGNORE_CORRUPT)); + if (!table) { + mutex_exit(&dict_sys->mutex); + return(HA_ERR_NO_SUCH_TABLE); + } + + fill_foreign_key_list(thd, table, f_key_list); + + mutex_exit(&dict_sys->mutex); + parent_trx->op_info = ""; + return(0); +} + /*******************************************************************//** Gets the list of foreign keys in this table. @return always 0, that is, always succeeds */ @@ -10105,9 +10193,6 @@ ha_innobase::get_parent_foreign_key_list( THD* thd, /*!< in: user thread handle */ List* f_key_list) /*!< out: foreign key list */ { - FOREIGN_KEY_INFO* pf_key_info; - dict_foreign_t* foreign; - ut_a(prebuilt != NULL); update_thd(ha_thd()); @@ -10116,16 +10201,7 @@ ha_innobase::get_parent_foreign_key_list( trx_search_latch_release_if_reserved(prebuilt->trx); mutex_enter(&(dict_sys->mutex)); - - for (foreign = UT_LIST_GET_FIRST(prebuilt->table->referenced_list); - foreign != NULL; - foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) { - pf_key_info = get_foreign_key_info(thd, foreign); - if (pf_key_info) { - f_key_list->push_back(pf_key_info); - } - } - + fill_foreign_key_list(thd, prebuilt->table, f_key_list); mutex_exit(&(dict_sys->mutex)); prebuilt->trx->op_info = ""; @@ -12539,7 +12615,6 @@ innodb_track_changed_pages_validate( for update function */ struct st_mysql_value* value) /*!< in: incoming bool */ { - static bool enabled_on_startup = false; long long intbuf = 0; if (value->val_int(value, &intbuf)) { @@ -12547,8 +12622,7 @@ innodb_track_changed_pages_validate( return 1; } - if (srv_track_changed_pages || enabled_on_startup) { - enabled_on_startup = true; + if (srv_redo_log_thread_started) { *reinterpret_cast(save) = static_cast(intbuf); return 0; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 77025c16373..23692c92c09 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1110,8 +1110,20 @@ buf_block_get_frame( /*================*/ const buf_block_t* block) /*!< in: pointer to the control block */ __attribute__((pure)); + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( + const buf_block_t* block) /*!< in: pointer to the control block */ + __attribute__((pure)); + #else /* UNIV_DEBUG */ # define buf_block_get_frame(block) (block ? (block)->frame : 0) +# define buf_nonnull_block_get_frame(block) ((block)->frame) #endif /* UNIV_DEBUG */ /*********************************************************************//** Gets the space id of a block. diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index f214112c7ce..fae44a1ac4a 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -718,6 +718,19 @@ buf_block_get_frame( { SRV_CORRUPT_TABLE_CHECK(block, return(0);); + return(buf_nonnull_block_get_frame(block)); +} + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( +/*========================*/ + const buf_block_t* block) /*!< in: pointer to the control block */ +{ switch (buf_block_get_state(block)) { case BUF_BLOCK_ZIP_FREE: case BUF_BLOCK_ZIP_PAGE: @@ -739,6 +752,7 @@ buf_block_get_frame( ok: return((buf_frame_t*) block->frame); } + #endif /* UNIV_DEBUG */ /*********************************************************************//** diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 3ccad0640b6..12ab9d9ed87 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -74,6 +74,11 @@ extern os_event_t srv_checkpoint_completed_event; that the (slow) shutdown may proceed */ extern os_event_t srv_redo_log_thread_finished_event; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ +extern my_bool srv_redo_log_thread_started; + /* If the last data file is auto-extended, we add this many pages to it at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ @@ -141,6 +146,9 @@ extern char* srv_doublewrite_file; extern ibool srv_recovery_stats; +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ extern my_bool srv_track_changed_pages; extern ib_uint64_t srv_max_bitmap_file_size; diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index b158a12027f..cc589166f8d 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -64,7 +64,7 @@ component, i.e. we show M.N.P as M.N */ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 38.0 +#define PERCONA_INNODB_VERSION 38.3 #endif #define INNODB_VERSION_STR MYSQL_SERVER_VERSION diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index b4b48a065f9..49ee8407b2c 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -3326,6 +3326,8 @@ logs_empty_and_mark_files_at_shutdown(void) algorithm only works if the server is idle at shutdown */ srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; + + srv_wake_purge_thread(); loop: os_thread_sleep(100000); @@ -3499,7 +3501,7 @@ loop: srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Wake the log tracking thread which will then immediatelly quit because of srv_shutdown_state value */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_set(srv_checkpoint_completed_event); os_event_wait(srv_redo_log_thread_finished_event); } @@ -3576,7 +3578,7 @@ loop: srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Signal the log following thread to quit */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_set(srv_checkpoint_completed_event); } @@ -3600,7 +3602,7 @@ loop: fil_flush_file_spaces(FIL_TABLESPACE); - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_wait(srv_redo_log_thread_finished_event); } diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c index d0127488f67..fa2c8b882bf 100644 --- a/storage/xtradb/log/log0online.c +++ b/storage/xtradb/log/log0online.c @@ -1813,7 +1813,7 @@ log_online_purge_changed_page_bitmaps( lsn = IB_ULONGLONG_MAX; } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { /* User requests might happen with both enabled and disabled tracking */ mutex_enter(&log_bmp_sys->mutex); @@ -1821,13 +1821,13 @@ log_online_purge_changed_page_bitmaps( if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, IB_ULONGLONG_MAX)) { - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { mutex_exit(&log_bmp_sys->mutex); } return TRUE; } - if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) { + if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) { /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); @@ -1858,7 +1858,7 @@ log_online_purge_changed_page_bitmaps( } } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { if (lsn > log_bmp_sys->end_lsn) { ib_uint64_t new_file_lsn; if (lsn == IB_ULONGLONG_MAX) { @@ -1869,9 +1869,7 @@ log_online_purge_changed_page_bitmaps( new_file_lsn = log_bmp_sys->end_lsn; } if (!log_online_rotate_bitmap_file(new_file_lsn)) { - /* If file create failed, signal the log - tracking thread to quit next time it wakes - up. */ + /* If file create failed, stop log tracking */ srv_track_changed_pages = FALSE; } } diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 6c2a121967e..527e7b3af0f 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -3015,7 +3015,7 @@ recv_recovery_from_checkpoint_start_func( ib_uint64_t checkpoint_lsn; ib_uint64_t checkpoint_no; ib_uint64_t old_scanned_lsn; - ib_uint64_t group_scanned_lsn; + ib_uint64_t group_scanned_lsn = 0; ib_uint64_t contiguous_lsn; #ifdef UNIV_LOG_ARCHIVE ib_uint64_t archived_lsn; diff --git a/storage/xtradb/mach/mach0data.c b/storage/xtradb/mach/mach0data.c index 95b135b0954..00378f036c9 100644 --- a/storage/xtradb/mach/mach0data.c +++ b/storage/xtradb/mach/mach0data.c @@ -56,7 +56,18 @@ mach_parse_compressed( *val = flag; return(ptr + 1); - } else if (flag < 0xC0UL) { + } + + /* Workaround GCC bug + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673: + the compiler moves mach_read_from_4 right to the beginning of the + function, causing and out-of-bounds read if we are reading a short + integer close to the end of buffer. */ +#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__) + asm volatile("": : :"memory"); +#endif + + if (flag < 0xC0UL) { if (end_ptr < ptr + 2) { return(NULL); } diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 26ad32cb1d1..0acce91f2c4 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -179,6 +179,9 @@ UNIV_INTERN char* srv_doublewrite_file = NULL; UNIV_INTERN ibool srv_recovery_stats = FALSE; +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ UNIV_INTERN my_bool srv_track_changed_pages = FALSE; UNIV_INTERN ib_uint64_t srv_max_bitmap_file_size = 100 * 1024 * 1024; @@ -809,6 +812,11 @@ UNIV_INTERN os_event_t srv_checkpoint_completed_event; UNIV_INTERN os_event_t srv_redo_log_thread_finished_event; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ +UNIV_INTERN my_bool srv_redo_log_thread_started = FALSE; + UNIV_INTERN srv_sys_t* srv_sys = NULL; /* padding to prevent other memory update hotspots from residing on @@ -3179,18 +3187,15 @@ srv_redo_log_follow_thread( #endif my_thread_init(); + srv_redo_log_thread_started = TRUE; do { os_event_wait(srv_checkpoint_completed_event); os_event_reset(srv_checkpoint_completed_event); -#ifdef UNIV_DEBUG - if (!srv_track_changed_pages) { - continue; - } -#endif + if (srv_track_changed_pages + && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { - if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { if (!log_online_follow_redo_log()) { /* TODO: sync with I_S log tracking status? */ fprintf(stderr, @@ -3206,6 +3211,7 @@ srv_redo_log_follow_thread( srv_track_changed_pages = FALSE; log_online_read_shutdown(); os_event_set(srv_redo_log_thread_finished_event); + srv_redo_log_thread_started = FALSE; /* Defensive, not required */ my_thread_end(); os_thread_exit(NULL); @@ -3327,7 +3333,7 @@ srv_master_do_purge(void) ut_ad(!mutex_own(&kernel_mutex)); - ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)); + ut_a(srv_n_purge_threads == 0); do { /* Check for shutdown and change in purge config. */ @@ -3848,7 +3854,7 @@ retry_flush_batch: /* Flush logs if needed */ srv_sync_log_buffer_in_background(); - if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) { + if (srv_n_purge_threads == 0) { srv_main_thread_op_info = "master purging"; srv_master_do_purge(); @@ -3926,7 +3932,7 @@ background_loop: } } - if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) { + if (srv_n_purge_threads == 0) { srv_main_thread_op_info = "master purging"; srv_master_do_purge(); @@ -4142,9 +4148,10 @@ srv_purge_thread( We peek at the history len without holding any mutex because in the worst case we will end up waiting for the next purge event. */ - if (trx_sys->rseg_history_len < srv_purge_batch_size - || (n_total_purged == 0 - && retries >= TRX_SYS_N_RSEGS)) { + if (srv_shutdown_state == SRV_SHUTDOWN_NONE + && (trx_sys->rseg_history_len < srv_purge_batch_size + || (n_total_purged == 0 + && retries >= TRX_SYS_N_RSEGS))) { mutex_enter(&kernel_mutex); @@ -4159,8 +4166,12 @@ srv_purge_thread( /* Check for shutdown and whether we should do purge at all. */ if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND - || srv_shutdown_state != 0 - || srv_fast_shutdown) { + || (srv_shutdown_state != SRV_SHUTDOWN_NONE + && srv_fast_shutdown) + || (srv_shutdown_state != SRV_SHUTDOWN_NONE + && srv_fast_shutdown == 0 + && n_total_purged == 0 + && retries >= TRX_SYS_N_RSEGS)) { break; } @@ -4183,6 +4194,9 @@ srv_purge_thread( srv_sync_log_buffer_in_background(); + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) + continue; + cur_time = ut_time_ms(); if (next_itr_time > cur_time) { os_thread_sleep(ut_min(1000000, From 383007c75d6ef5043fa5781956a6a02b24e2b79e Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 13 Oct 2016 21:35:01 +0200 Subject: [PATCH 61/96] mysql cli: fix USE command quoting * use proper sql quoting rules for USE, while preserving as much of historical behavior as possible * short commands (\u) behave as before --- client/mysql.cc | 56 +++++++++++++++++++++------------------ mysql-test/r/mysql.result | 8 ++++++ mysql-test/t/mysql.test | 8 ++++++ 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/client/mysql.cc b/client/mysql.cc index 9d255b55430..9b1999f2c38 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -245,7 +245,8 @@ static void end_pager(); static void init_tee(const char *); static void end_tee(); static const char* construct_prompt(); -static char *get_arg(char *line, my_bool get_next_arg); +enum get_arg_mode { CHECK, GET, GET_NEXT}; +static char *get_arg(char *line, get_arg_mode mode); static void init_username(); static void add_int_to_prompt(int toadd); static int get_result_width(MYSQL_RES *res); @@ -2223,7 +2224,7 @@ static COMMANDS *find_command(char *name) if (!my_strnncoll(&my_charset_latin1, (uchar*) name, len, (uchar*) commands[i].name, len) && (commands[i].name[len] == '\0') && - (!end || commands[i].takes_params)) + (!end || (commands[i].takes_params && get_arg(name, CHECK)))) { index= i; break; @@ -3143,7 +3144,7 @@ com_charset(String *buffer __attribute__((unused)), char *line) char buff[256], *param; CHARSET_INFO * new_cs; strmake_buf(buff, line); - param= get_arg(buff, 0); + param= get_arg(buff, GET); if (!param || !*param) { return put_info("Usage: \\C charset_name | charset charset_name", @@ -4228,12 +4229,12 @@ com_connect(String *buffer, char *line) #ifdef EXTRA_DEBUG tmp[1]= 0; #endif - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (tmp && *tmp) { my_free(current_db); current_db= my_strdup(tmp, MYF(MY_WME)); - tmp= get_arg(buff, 1); + tmp= get_arg(buff, GET_NEXT); if (tmp) { my_free(current_host); @@ -4336,7 +4337,7 @@ com_delimiter(String *buffer __attribute__((unused)), char *line) char buff[256], *tmp; strmake_buf(buff, line); - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (!tmp || !*tmp) { @@ -4367,7 +4368,7 @@ com_use(String *buffer __attribute__((unused)), char *line) bzero(buff, sizeof(buff)); strmake_buf(buff, line); - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (!tmp || !*tmp) { put_info("USE must be followed by a database name", INFO_ERROR); @@ -4452,23 +4453,22 @@ com_nowarnings(String *buffer __attribute__((unused)), } /* - Gets argument from a command on the command line. If get_next_arg is - not defined, skips the command and returns the first argument. The - line is modified by adding zero to the end of the argument. If - get_next_arg is defined, then the function searches for end of string - first, after found, returns the next argument and adds zero to the - end. If you ever wish to use this feature, remember to initialize all - items in the array to zero first. + Gets argument from a command on the command line. If mode is not GET_NEXT, + skips the command and returns the first argument. The line is modified by + adding zero to the end of the argument. If mode is GET_NEXT, then the + function searches for end of string first, after found, returns the next + argument and adds zero to the end. If you ever wish to use this feature, + remember to initialize all items in the array to zero first. */ -char *get_arg(char *line, my_bool get_next_arg) +static char *get_arg(char *line, get_arg_mode mode) { char *ptr, *start; - my_bool quoted= 0, valid_arg= 0; + bool short_cmd= false; char qtype= 0; ptr= line; - if (get_next_arg) + if (mode == GET_NEXT) { for (; *ptr; ptr++) ; if (*(ptr + 1)) @@ -4479,7 +4479,7 @@ char *get_arg(char *line, my_bool get_next_arg) /* skip leading white spaces */ while (my_isspace(charset_info, *ptr)) ptr++; - if (*ptr == '\\') // short command was used + if ((short_cmd= *ptr == '\\')) // short command was used ptr+= 2; else while (*ptr &&!my_isspace(charset_info, *ptr)) // skip command @@ -4492,24 +4492,28 @@ char *get_arg(char *line, my_bool get_next_arg) if (*ptr == '\'' || *ptr == '\"' || *ptr == '`') { qtype= *ptr; - quoted= 1; ptr++; } for (start=ptr ; *ptr; ptr++) { - if (*ptr == '\\' && ptr[1]) // escaped character + if ((*ptr == '\\' && ptr[1]) || // escaped character + (!short_cmd && qtype && *ptr == qtype && ptr[1] == qtype)) // quote { - // Remove the backslash - strmov_overlapp(ptr, ptr+1); + // Remove (or skip) the backslash (or a second quote) + if (mode != CHECK) + strmov_overlapp(ptr, ptr+1); + else + ptr++; } - else if ((!quoted && *ptr == ' ') || (quoted && *ptr == qtype)) + else if (*ptr == (qtype ? qtype : ' ')) { - *ptr= 0; + qtype= 0; + if (mode != CHECK) + *ptr= 0; break; } } - valid_arg= ptr != start; - return valid_arg ? start : NullS; + return ptr != start && !qtype ? start : NullS; } diff --git a/mysql-test/r/mysql.result b/mysql-test/r/mysql.result index cb705d285fe..dd0129df0d9 100644 --- a/mysql-test/r/mysql.result +++ b/mysql-test/r/mysql.result @@ -512,6 +512,14 @@ DROP DATABASE connected_db; create database `aa``bb````cc`; DATABASE() aa`bb``cc +DATABASE() +test +DATABASE() +aa`bb``cc +DATABASE() +test +DATABASE() +aa`bb``cc drop database `aa``bb````cc`; a >>\ndelimiter\n<< diff --git a/mysql-test/t/mysql.test b/mysql-test/t/mysql.test index 6281bb5f4c1..d59083d66b0 100644 --- a/mysql-test/t/mysql.test +++ b/mysql-test/t/mysql.test @@ -586,8 +586,16 @@ DROP DATABASE connected_db; # USE and names with backticks # --write_file $MYSQLTEST_VARDIR/tmp/backticks.sql +\u aa`bb``cc +SELECT DATABASE(); +USE test +SELECT DATABASE(); USE aa`bb``cc SELECT DATABASE(); +USE test +SELECT DATABASE(); +USE `aa``bb````cc` +SELECT DATABASE(); EOF create database `aa``bb````cc`; --exec $MYSQL < $MYSQLTEST_VARDIR/tmp/backticks.sql From 01b39b7b0730102b88d8ea43ec719a75e9316a1e Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 13 Oct 2016 20:58:08 +0200 Subject: [PATCH 62/96] mysqltest: don't eat new lines in --exec pass them through as is --- client/mysqltest.cc | 4 ---- mysql-test/r/mysql_not_windows.result | 6 ++++++ mysql-test/r/mysqltest.result | 6 ------ mysql-test/t/mysql_not_windows.test | 9 +++++++++ mysql-test/t/mysqltest.test | 9 --------- 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 3652d1a40e2..acb9e8b1e0c 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -3349,10 +3349,6 @@ void do_exec(struct st_command *command) #endif #endif - /* exec command is interpreted externally and will not take newlines */ - while(replace(&ds_cmd, "\n", 1, " ", 1) == 0) - ; - DBUG_PRINT("info", ("Executing '%s' as '%s'", command->first_argument, ds_cmd.str)); diff --git a/mysql-test/r/mysql_not_windows.result b/mysql-test/r/mysql_not_windows.result index d5670a1a9ca..1df62d9a12d 100644 --- a/mysql-test/r/mysql_not_windows.result +++ b/mysql-test/r/mysql_not_windows.result @@ -3,3 +3,9 @@ a 1 End of tests +1 +1 +2 +2 +X +3 diff --git a/mysql-test/r/mysqltest.result b/mysql-test/r/mysqltest.result index 865c8d7077b..0ebef585974 100644 --- a/mysql-test/r/mysqltest.result +++ b/mysql-test/r/mysqltest.result @@ -269,12 +269,6 @@ source database echo message echo message mysqltest: At line 1: Missing argument in exec -1 -1 -2 -2 -X -3 MySQL "MySQL" MySQL: The diff --git a/mysql-test/t/mysql_not_windows.test b/mysql-test/t/mysql_not_windows.test index 66853677f7b..591de74cbbf 100644 --- a/mysql-test/t/mysql_not_windows.test +++ b/mysql-test/t/mysql_not_windows.test @@ -13,3 +13,12 @@ --echo --echo End of tests + +# Multi-line exec +exec $MYSQL \ + test -e "select 1"; +exec $MYSQL test -e "select + 2"; +let $query = select 3 + as X; +exec $MYSQL test -e "$query"; diff --git a/mysql-test/t/mysqltest.test b/mysql-test/t/mysqltest.test index ffbec36873e..6470ede4f14 100644 --- a/mysql-test/t/mysqltest.test +++ b/mysql-test/t/mysqltest.test @@ -741,15 +741,6 @@ echo ; --error 1 --exec echo "--exec " | $MYSQL_TEST 2>&1 -# Multi-line exec -exec $MYSQL - test -e "select 1"; -exec $MYSQL test -e "select - 2"; -let $query = select 3 - as X; -exec $MYSQL test -e "$query"; - # ---------------------------------------------------------------------------- # Test let command # ---------------------------------------------------------------------------- From 5a43a31ee81bc181eeb5ef2bf0704befa6e0594d Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 14 Oct 2016 00:33:49 +0200 Subject: [PATCH 63/96] mysqldump: comments and identifiers with new lines don't let identifiers with new lines to break a comment --- client/mysqldump.c | 60 ++++++++++----- mysql-test/r/mysqldump-nl.result | 126 +++++++++++++++++++++++++++++++ mysql-test/t/mysqldump-nl.test | 38 ++++++++++ 3 files changed, 207 insertions(+), 17 deletions(-) create mode 100644 mysql-test/r/mysqldump-nl.result create mode 100644 mysql-test/t/mysqldump-nl.test diff --git a/client/mysqldump.c b/client/mysqldump.c index 16b39b77cf1..32c350d3078 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -547,9 +547,7 @@ static int dump_all_tablespaces(); static int dump_tablespaces_for_tables(char *db, char **table_names, int tables); static int dump_tablespaces_for_databases(char** databases); static int dump_tablespaces(char* ts_where); -static void print_comment(FILE *sql_file, my_bool is_error, const char *format, - ...); - +static void print_comment(FILE *, my_bool, const char *, ...); /* Print the supplied message if in verbose mode @@ -627,6 +625,30 @@ static void short_usage(FILE *f) } +/** returns a string fixed to be safely printed inside a -- comment + + that is, any new line in it gets prefixed with -- +*/ +static const char *fix_for_comment(const char *ident) +{ + static char buf[1024]; + char c, *s= buf; + + while ((c= *s++= *ident++)) + { + if (s >= buf + sizeof(buf) - 10) + { + strmov(s, "..."); + break; + } + if (c == '\n') + s= strmov(s, "-- "); + } + + return buf; +} + + static void write_header(FILE *sql_file, char *db_name) { if (opt_xml) @@ -649,8 +671,8 @@ static void write_header(FILE *sql_file, char *db_name) DUMP_VERSION, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); print_comment(sql_file, 0, "-- Host: %s Database: %s\n", - current_host ? current_host : "localhost", - db_name ? db_name : ""); + fix_for_comment(current_host ? current_host : "localhost"), + fix_for_comment(db_name ? db_name : "")); print_comment(sql_file, 0, "-- ------------------------------------------------------\n" ); @@ -2094,7 +2116,8 @@ static uint dump_events_for_db(char *db) /* nice comments */ print_comment(sql_file, 0, - "\n--\n-- Dumping events for database '%s'\n--\n", db); + "\n--\n-- Dumping events for database '%s'\n--\n", + fix_for_comment(db)); /* not using "mysql_query_with_error_report" because we may have not @@ -2307,7 +2330,8 @@ static uint dump_routines_for_db(char *db) /* nice comments */ print_comment(sql_file, 0, - "\n--\n-- Dumping routines for database '%s'\n--\n", db); + "\n--\n-- Dumping routines for database '%s'\n--\n", + fix_for_comment(db)); /* not using "mysql_query_with_error_report" because we may have not @@ -2580,11 +2604,11 @@ static uint get_table_structure(char *table, char *db, char *table_type, if (strcmp (table_type, "VIEW") == 0) /* view */ print_comment(sql_file, 0, "\n--\n-- Temporary table structure for view %s\n--\n\n", - result_table); + fix_for_comment(result_table)); else print_comment(sql_file, 0, "\n--\n-- Table structure for table %s\n--\n\n", - result_table); + fix_for_comment(result_table)); if (opt_drop) { @@ -2826,7 +2850,7 @@ static uint get_table_structure(char *table, char *db, char *table_type, print_comment(sql_file, 0, "\n--\n-- Table structure for table %s\n--\n\n", - result_table); + fix_for_comment(result_table)); if (opt_drop) fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", result_table); if (!opt_xml) @@ -3530,21 +3554,21 @@ static void dump_table(char *table, char *db) { print_comment(md_result_file, 0, "\n--\n-- Dumping data for table %s\n--\n", - result_table); + fix_for_comment(result_table)); dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM "); dynstr_append_checked(&query_string, result_table); if (where) { - print_comment(md_result_file, 0, "-- WHERE: %s\n", where); + print_comment(md_result_file, 0, "-- WHERE: %s\n", fix_for_comment(where)); dynstr_append_checked(&query_string, " WHERE "); dynstr_append_checked(&query_string, where); } if (order_by) { - print_comment(md_result_file, 0, "-- ORDER BY: %s\n", order_by); + print_comment(md_result_file, 0, "-- ORDER BY: %s\n", fix_for_comment(order_by)); dynstr_append_checked(&query_string, " ORDER BY "); dynstr_append_checked(&query_string, order_by); @@ -4053,7 +4077,7 @@ static int dump_tablespaces(char* ts_where) if (first) { print_comment(md_result_file, 0, "\n--\n-- Logfile group: %s\n--\n", - row[0]); + fix_for_comment(row[0])); fprintf(md_result_file, "\nCREATE"); } @@ -4122,7 +4146,8 @@ static int dump_tablespaces(char* ts_where) first= 1; if (first) { - print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", row[0]); + print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", + fix_for_comment(row[0])); fprintf(md_result_file, "\nCREATE"); } else @@ -4326,7 +4351,8 @@ static int init_dumping(char *database, int init_func(char*)) char *qdatabase= quote_name(database,quoted_database_buf,opt_quoted); print_comment(md_result_file, 0, - "\n--\n-- Current Database: %s\n--\n", qdatabase); + "\n--\n-- Current Database: %s\n--\n", + fix_for_comment(qdatabase)); /* Call the view or table specific function */ init_func(qdatabase); @@ -5356,7 +5382,7 @@ static my_bool get_view_structure(char *table, char* db) print_comment(sql_file, 0, "\n--\n-- Final view structure for view %s\n--\n\n", - result_table); + fix_for_comment(result_table)); /* Table might not exist if this view was dumped with --tab. */ fprintf(sql_file, "/*!50001 DROP TABLE IF EXISTS %s*/;\n", opt_quoted_table); diff --git a/mysql-test/r/mysqldump-nl.result b/mysql-test/r/mysqldump-nl.result new file mode 100644 index 00000000000..6de439bdf3c --- /dev/null +++ b/mysql-test/r/mysqldump-nl.result @@ -0,0 +1,126 @@ +create database `mysqltest1 +1tsetlqsym`; +use `mysqltest1 +1tsetlqsym`; +create table `t1 +1t` (`foobar +raboof` int); +create view `v1 +1v` as select * from `t1 +1t`; +create procedure sp() select * from `v1 +1v`; +flush tables; +use test; + +-- +-- Current Database: `mysqltest1 +-- 1tsetlqsym` +-- + +/*!40000 DROP DATABASE IF EXISTS `mysqltest1 +1tsetlqsym`*/; + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `mysqltest1 +1tsetlqsym` /*!40100 DEFAULT CHARACTER SET latin1 */; + +USE `mysqltest1 +1tsetlqsym`; + +-- +-- Table structure for table `t1 +-- 1t` +-- + +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `t1 +1t` ( + `foobar +raboof` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `t1 +-- 1t` +-- + +-- +-- Temporary table structure for view `v1 +-- 1v` +-- + +SET @saved_cs_client = @@character_set_client; +SET character_set_client = utf8; +/*!50001 CREATE TABLE `v1 +1v` ( + `foobar +raboof` tinyint NOT NULL +) ENGINE=MyISAM */; +SET character_set_client = @saved_cs_client; + +-- +-- Dumping routines for database 'mysqltest1 +-- 1tsetlqsym' +-- +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = latin1 */ ; +/*!50003 SET character_set_results = latin1 */ ; +/*!50003 SET collation_connection = latin1_swedish_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = '' */ ; +DELIMITER ;; +CREATE DEFINER=`root`@`localhost` PROCEDURE `sp`() +select * from `v1 +1v` ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; + +-- +-- Current Database: `mysqltest1 +-- 1tsetlqsym` +-- + +USE `mysqltest1 +1tsetlqsym`; + +-- +-- Final view structure for view `v1 +-- 1v` +-- + +/*!50001 DROP TABLE IF EXISTS `v1 +1v`*/; +/*!50001 SET @saved_cs_client = @@character_set_client */; +/*!50001 SET @saved_cs_results = @@character_set_results */; +/*!50001 SET @saved_col_connection = @@collation_connection */; +/*!50001 SET character_set_client = latin1 */; +/*!50001 SET character_set_results = latin1 */; +/*!50001 SET collation_connection = latin1_swedish_ci */; +/*!50001 CREATE ALGORITHM=UNDEFINED */ +/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */ +/*!50001 VIEW `v1 +1v` AS select `t1 +1t`.`foobar +raboof` AS `foobar +raboof` from `t1 +1t` */; +/*!50001 SET character_set_client = @saved_cs_client */; +/*!50001 SET character_set_results = @saved_cs_results */; +/*!50001 SET collation_connection = @saved_col_connection */; +show tables from `mysqltest1 +1tsetlqsym`; +Tables_in_mysqltest1 +1tsetlqsym +t1 +1t +v1 +1v +drop database `mysqltest1 +1tsetlqsym`; diff --git a/mysql-test/t/mysqldump-nl.test b/mysql-test/t/mysqldump-nl.test new file mode 100644 index 00000000000..311996e77c3 --- /dev/null +++ b/mysql-test/t/mysqldump-nl.test @@ -0,0 +1,38 @@ +# +# New lines in identifiers +# + +# embedded server doesn't support external clients +--source include/not_embedded.inc +# cmd.exe doesn't like new lines on the command line +--source include/not_windows.inc + +create database `mysqltest1 +1tsetlqsym`; +use `mysqltest1 +1tsetlqsym`; + +create table `t1 +1t` (`foobar +raboof` int); +create view `v1 +1v` as select * from `t1 +1t`; + +create procedure sp() select * from `v1 +1v`; + +flush tables; +use test; + +exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1 +1tsetlqsym'; + +exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1 +1tsetlqsym' | $MYSQL; + +show tables from `mysqltest1 +1tsetlqsym`; + +drop database `mysqltest1 +1tsetlqsym`; From eac8d95ffcdea7cd31d60d273e30cb3dfec66add Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 14 Oct 2016 12:51:53 +0200 Subject: [PATCH 64/96] compilation warning after xtradb merge --- storage/xtradb/handler/ha_innodb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 386920e689d..66fcc2799bb 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -481,7 +481,7 @@ int innobase_get_parent_fk_list( THD* thd, const char* path, - List* f_key_list); + List* f_key_list) __attribute__((unused)); /******************************************************************//** Maps a MySQL trx isolation level code to the InnoDB isolation level code From b7aee7dbe71cf77199e28e905469f0d9fb6d4a80 Mon Sep 17 00:00:00 2001 From: Olivier Bertrand Date: Fri, 14 Oct 2016 18:29:33 +0200 Subject: [PATCH 65/96] - Fix MDEV-10950. Null values not retrieved for numeric types. Now the null is tested using the result set getObject method. modified: storage/connect/JdbcInterface.java modified: storage/connect/jdbconn.cpp modified: storage/connect/jdbconn.h --- storage/connect/JdbcInterface.java | 4 +-- storage/connect/jdbconn.cpp | 39 ++++++++++++++++++++---------- storage/connect/jdbconn.h | 1 + 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/storage/connect/JdbcInterface.java b/storage/connect/JdbcInterface.java index e339c989113..34af8c4e013 100644 --- a/storage/connect/JdbcInterface.java +++ b/storage/connect/JdbcInterface.java @@ -692,11 +692,11 @@ public class JdbcInterface { return 0; } // end of TimestampField - public String ObjectField(int n, String name) { + public Object ObjectField(int n, String name) { if (rs == null) { System.out.println("No result set"); } else try { - return (n > 0) ? rs.getObject(n).toString() : rs.getObject(name).toString(); + return (n > 0) ? rs.getObject(n) : rs.getObject(name); } catch (SQLException se) { SetErrmsg(se); } //end try/catch diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp index 229ade53ad1..dca9bd0eac4 100644 --- a/storage/connect/jdbconn.cpp +++ b/storage/connect/jdbconn.cpp @@ -512,7 +512,7 @@ JDBConn::JDBConn(PGLOBAL g, TDBJDBC *tdbp) xqid = xuid = xid = grs = readid = fetchid = typid = errid = nullptr; prepid = xpid = pcid = nullptr; chrfldid = intfldid = dblfldid = fltfldid = bigfldid = nullptr; - datfldid = timfldid = tspfldid = nullptr; + objfldid = datfldid = timfldid = tspfldid = nullptr; //m_LoginTimeout = DEFAULT_LOGIN_TIMEOUT; //m_QueryTimeout = DEFAULT_QUERY_TIMEOUT; //m_UpdateOptions = 0; @@ -1167,9 +1167,10 @@ void JDBConn::Close() /***********************************************************************/ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) { - PGLOBAL& g = m_G; - jint ctyp; - jstring cn, jn = nullptr; + PGLOBAL& g = m_G; + jint ctyp; + jstring cn, jn = nullptr; + jobject jb = nullptr; if (rank == 0) if (!name || (jn = env->NewStringUTF(name)) == nullptr) { @@ -1185,21 +1186,32 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) longjmp(g->jumper[g->jump_level], TYPE_AM_JDBC); } // endif Check + if (val->GetNullable()) + if (!gmID(g, objfldid, "ObjectField", "(ILjava/lang/String;)Ljava/lang/Object;")) { + jb = env->CallObjectMethod(job, objfldid, (jint)rank, jn); + + if (jb == nullptr) { + val->Reset(); + val->SetNull(true); + goto chk; + } // endif job + + } // endif objfldid + switch (ctyp) { case 12: // VARCHAR case -1: // LONGVARCHAR case 1: // CHAR - if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) { + if (jb) + cn = (jstring)jb; + else if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) cn = (jstring)env->CallObjectMethod(job, chrfldid, (jint)rank, jn); + else + cn = nullptr; - if (cn) { - const char *field = env->GetStringUTFChars(cn, (jboolean)false); - val->SetValue_psz((PSZ)field); - } else { - val->Reset(); - val->SetNull(true); - } // endif cn - + if (cn) { + const char *field = env->GetStringUTFChars(cn, (jboolean)false); + val->SetValue_psz((PSZ)field); } else val->Reset(); @@ -1271,6 +1283,7 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) val->Reset(); } // endswitch Type + chk: if (Check()) { if (rank == 0) env->DeleteLocalRef(jn); diff --git a/storage/connect/jdbconn.h b/storage/connect/jdbconn.h index 095b1565bd2..0a1c52d4576 100644 --- a/storage/connect/jdbconn.h +++ b/storage/connect/jdbconn.h @@ -165,6 +165,7 @@ protected: jmethodID xpid; // The ExecutePrep method ID jmethodID pcid; // The ClosePrepStmt method ID jmethodID errid; // The GetErrmsg method ID + jmethodID objfldid; // The ObjectField method ID jmethodID chrfldid; // The StringField method ID jmethodID intfldid; // The IntField method ID jmethodID dblfldid; // The DoubleField method ID From f6d4f82d6e49ed1ca2155c9e0e12f3dd8fcb1acf Mon Sep 17 00:00:00 2001 From: Elena Stepanova Date: Fri, 14 Oct 2016 23:23:16 +0300 Subject: [PATCH 66/96] MDEV-11061 Valgrind builder produces endless warnings after switching to OpenSS --- mysql-test/valgrind.supp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp index 723b609de8f..2dd10ff4008 100644 --- a/mysql-test/valgrind.supp +++ b/mysql-test/valgrind.supp @@ -1228,3 +1228,25 @@ fun:dlopen@@GLIBC_2.2.5 } +{ + MDEV-11061: OpenSSL 0.9.8 - Conditional jump or move + Memcheck:Cond + fun:BN_* + ... + fun:ssl3_ctx_ctrl + fun:new_VioSSLFd + fun:new_VioSSLAcceptorFd + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 - Use of uninitialised value + Memcheck:Value8 + fun:BN_* + ... + fun:ssl3_ctx_ctrl + fun:new_VioSSLFd + fun:new_VioSSLAcceptorFd + ... +} + From 8a49e00f3f1a81b6645ac3f2d843c9e5dd0375ba Mon Sep 17 00:00:00 2001 From: Elena Stepanova Date: Fri, 14 Oct 2016 23:23:49 +0300 Subject: [PATCH 67/96] More unstable tests --- mysql-test/unstable-tests | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mysql-test/unstable-tests b/mysql-test/unstable-tests index 6a46602eb07..2dbaeeebc0b 100644 --- a/mysql-test/unstable-tests +++ b/mysql-test/unstable-tests @@ -56,6 +56,7 @@ main.named_pipe : Modified on 2016-08-02 (MDEV-10383) main.openssl_1 : Modified on 2016-07-11 (MDEV-10211) main.parser : Modified on 2016-06-21 (merge) main.pool_of_threads : MDEV-10100 - sporadic error on detecting max connections +main.ps : MDEV-11017 - sporadic wrong Prepared_stmt_count main.ps_1general : Modified on 2016-07-12 (merge) main.range : Modified on 2016-08-10 (merge) main.range_mrr_icp : Modified on 2016-08-10 (merge) @@ -116,6 +117,7 @@ innodb.innodb_corrupt_bit : Modified on 2016-06-21 (merge) innodb.innodb_bug30423 : MDEV-7311 - Wrong number of rows in the plan innodb.innodb-fk-warnings : Modified on 2016-07-18 (MDEV-8569) innodb.innodb-fkcheck : Modified on 2016-06-13 (MDEV-10083) +innodb.innodb_monitor : MDEV-10939 - Testcase timeout innodb.innodb-wl5522 : rdiff file modified on 2016-08-10 (merge) innodb.innodb-wl5522-debug-zip : MDEV-10427 - Warning: database page corruption @@ -145,6 +147,7 @@ parts.partition_int_myisam : MDEV-10621 - Testcase timeout perfschema.digest_table_full : Modified on 2016-06-21 (merge) perfschema.func_file_io : MDEV-5708 - fails for s390x perfschema.func_mutex : MDEV-5708 - fails for s390x +perfschema.hostcache_ipv6_ssl : MDEV-10696 - crash on shutdown perfschema.rpl_gtid_func : Modified on 2016-06-21 (merge) perfschema.sizing_low : Modified on 2016-04-26 (5.6.30 merge) perfschema.socket_summary_by_event_name_func : MDEV-10622 - Socket summary tables do not match From 4192c468675220e0ad2de9eb722cfa457c0e5ced Mon Sep 17 00:00:00 2001 From: Elena Stepanova Date: Sun, 16 Oct 2016 04:46:39 +0300 Subject: [PATCH 68/96] MDEV-11061 Valgrind builder produces endless warnings OpenSSL problems, part II --- mysql-test/valgrind.supp | 117 +++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp index 2dd10ff4008..77f17cf07ec 100644 --- a/mysql-test/valgrind.supp +++ b/mysql-test/valgrind.supp @@ -1228,25 +1228,122 @@ fun:dlopen@@GLIBC_2.2.5 } +# +# MDEV-11061: OpenSSL 0.9.8 problems +# + { - MDEV-11061: OpenSSL 0.9.8 - Conditional jump or move + MDEV-11061: OpenSSL 0.9.8 Memcheck:Cond - fun:BN_* + obj:*/libz.so* ... - fun:ssl3_ctx_ctrl - fun:new_VioSSLFd - fun:new_VioSSLAcceptorFd + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 ... } { - MDEV-11061: OpenSSL 0.9.8 - Use of uninitialised value + MDEV-11061: OpenSSL 0.9.8 Memcheck:Value8 - fun:BN_* + obj:*/libz.so* ... - fun:ssl3_ctx_ctrl - fun:new_VioSSLFd - fun:new_VioSSLAcceptorFd + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + obj:*/libssl.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + obj:*/libssl.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:is_overlap + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:memset + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + fun:memset + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Param + write(buf) + obj:*/libpthread-2.9.so* + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 ... } From df87be5edafb402e36e9c16aa0f00b1d5104d920 Mon Sep 17 00:00:00 2001 From: Elena Stepanova Date: Mon, 17 Oct 2016 14:04:45 +0300 Subject: [PATCH 69/96] MDEV-11069 main.information_schema test fails if hostname includes 'user' Patch provided by Honza Horak --- mysql-test/r/information_schema.result | 8 ++++---- mysql-test/t/information_schema.test | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index d98f8168e9e..1f765a70137 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -986,19 +986,19 @@ show grants; Grants for user3@localhost GRANT USAGE ON *.* TO 'user3'@'localhost' GRANT SELECT ON `mysqltest`.* TO 'user3'@'localhost' -select * from information_schema.column_privileges where grantee like '%user%' +select * from information_schema.column_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME COLUMN_NAME PRIVILEGE_TYPE IS_GRANTABLE 'user1'@'localhost' def mysqltest t1 f1 SELECT NO -select * from information_schema.table_privileges where grantee like '%user%' +select * from information_schema.table_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME PRIVILEGE_TYPE IS_GRANTABLE 'user2'@'localhost' def mysqltest t2 SELECT NO -select * from information_schema.schema_privileges where grantee like '%user%' +select * from information_schema.schema_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA PRIVILEGE_TYPE IS_GRANTABLE 'user3'@'localhost' def mysqltest SELECT NO -select * from information_schema.user_privileges where grantee like '%user%' +select * from information_schema.user_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG PRIVILEGE_TYPE IS_GRANTABLE 'user1'@'localhost' def USAGE NO diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test index fb39f2e5d58..943eb8bab8a 100644 --- a/mysql-test/t/information_schema.test +++ b/mysql-test/t/information_schema.test @@ -612,13 +612,13 @@ select * from information_schema.schema_privileges order by grantee; select * from information_schema.user_privileges order by grantee; show grants; connection con4; -select * from information_schema.column_privileges where grantee like '%user%' +select * from information_schema.column_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.table_privileges where grantee like '%user%' +select * from information_schema.table_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.schema_privileges where grantee like '%user%' +select * from information_schema.schema_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.user_privileges where grantee like '%user%' +select * from information_schema.user_privileges where grantee like '\'user%' order by grantee; show grants; connection default; From 6e257274d98843b228e5bd08da74031f6f3a202d Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 17 Oct 2016 11:43:47 -0400 Subject: [PATCH 70/96] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d44c8b28006..4f1ecb3a197 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=53 +MYSQL_VERSION_PATCH=54 MYSQL_VERSION_EXTRA= From 4dfb6a3f54cfb26535636197cc5fa70fe5bacc2e Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 28 Sep 2016 14:16:38 +0000 Subject: [PATCH 71/96] MDEV-11083 performance schema test fail with threadpool Fix PSI idle and socket instrumentation in threadpool --- sql/threadpool_common.cc | 99 ++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index 5bcea767aae..9d263038bc9 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -73,17 +73,16 @@ struct Worker_thread_context void save() { -#ifdef HAVE_PSI_INTERFACE - psi_thread= PSI_server?PSI_server->get_thread():0; +#ifdef HAVE_PSI_THREAD_INTERFACE + psi_thread = PSI_THREAD_CALL(get_thread)(); #endif mysys_var= (st_my_thread_var *)pthread_getspecific(THR_KEY_mysys); } void restore() { -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - PSI_server->set_thread(psi_thread); +#ifdef HAVE_PSI_THREAD_INTERFACE + PSI_THREAD_CALL(set_thread)(psi_thread); #endif pthread_setspecific(THR_KEY_mysys,mysys_var); pthread_setspecific(THR_THD, 0); @@ -92,6 +91,41 @@ struct Worker_thread_context }; +#ifdef HAVE_PSI_INTERFACE + +/* + The following fixes PSI "idle" psi instrumentation. + The server assumes that connection becomes idle + just before net_read_packet() and switches to active after it. + In out setup, server becomes idle when async socket io is made. +*/ + +extern void net_before_header_psi(struct st_net *net, void *user_data, size_t); + +static void dummy_before_header(struct st_net *, void *, size_t) +{ +} + +static void re_init_net_server_extension(THD *thd) +{ + thd->m_net_server_extension.m_before_header = dummy_before_header; +} + +#else + +#define re_init_net_server_extension(thd) + +#endif /* HAVE_PSI_INTERFACE */ + + +static inline void set_thd_idle(THD *thd) +{ + thd->net.reading_or_writing= 1; +#ifdef HAVE_PSI_INTERFACE + net_before_header_psi(&thd->net, thd, 0); +#endif +} + /* Attach/associate the connection with the OS thread, */ @@ -100,10 +134,10 @@ static bool thread_attach(THD* thd) pthread_setspecific(THR_KEY_mysys,thd->mysys_var); thd->thread_stack=(char*)&thd; thd->store_globals(); -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - PSI_server->set_thread(thd->event_scheduler.m_psi); +#ifdef HAVE_PSI_THREAD_INTERFACE + PSI_THREAD_CALL(set_thread)(thd->event_scheduler.m_psi); #endif + mysql_socket_set_thread_owner(thd->net.vio->mysql_socket); return 0; } @@ -130,40 +164,38 @@ int threadpool_add_connection(THD *thd) } /* Create new PSI thread for use with the THD. */ -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - { - thd->event_scheduler.m_psi = - PSI_server->new_thread(key_thread_one_connection, thd, thd->thread_id); - } +#ifdef HAVE_PSI_THREAD_INTERFACE + thd->event_scheduler.m_psi= + PSI_THREAD_CALL(new_thread)(key_thread_one_connection, thd, thd->thread_id); #endif /* Login. */ thread_attach(thd); + re_init_net_server_extension(thd); ulonglong now= microsecond_interval_timer(); thd->prior_thr_create_utime= now; thd->start_utime= now; thd->thr_create_utime= now; - if (!setup_connection_thread_globals(thd)) - { - if (!login_connection(thd)) - { - prepare_new_connection_state(thd); - - /* - Check if THD is ok, as prepare_new_connection_state() - can fail, for example if init command failed. - */ - if (thd_is_connection_alive(thd)) - { - retval= 0; - thd->net.reading_or_writing= 1; - thd->skip_wait_timeout= true; - } - } - } + if (setup_connection_thread_globals(thd)) + goto end; + + if (thd_prepare_connection(thd)) + goto end; + + /* + Check if THD is ok, as prepare_new_connection_state() + can fail, for example if init command failed. + */ + if (!thd_is_connection_alive(thd)) + goto end; + + retval= 0; + thd->skip_wait_timeout= true; + set_thd_idle(thd); + +end: worker_context.restore(); return retval; } @@ -245,12 +277,13 @@ int threadpool_process_request(THD *thd) goto end; } + set_thd_idle(thd); + vio= thd->net.vio; if (!vio->has_data(vio)) { /* More info on this debug sync is in sql_parse.cc*/ DEBUG_SYNC(thd, "before_do_command_net_read"); - thd->net.reading_or_writing= 1; goto end; } } From 998f987eda62e6b3481ac3914538282715e2df4a Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Fri, 21 Oct 2016 22:37:51 +0200 Subject: [PATCH 72/96] Upstream MIPS test fixes from Debian Bug 838557. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=838557 MIPS has a different errno for "directory not empty". --- mysql-test/extra/binlog_tests/database.test | 2 +- mysql-test/suite/rpl/t/rpl_drop_db.test | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test index f111a028642..17f8e069fa3 100644 --- a/mysql-test/extra/binlog_tests/database.test +++ b/mysql-test/extra/binlog_tests/database.test @@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix'; # Use '/' instead of '\' in the error message. On windows platform, dir is # formed with '\'. ---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/ +--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /93/39/ /17/39/ /247/39/ /File exists/Directory not empty/ --error 1010 DROP DATABASE testing_1; let $wait_binlog_event= DROP TABLE IF EXIST; diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test index dae1651dc93..f66187b12f5 100644 --- a/mysql-test/suite/rpl/t/rpl_drop_db.test +++ b/mysql-test/suite/rpl/t/rpl_drop_db.test @@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1); select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt'; create table mysqltest1.t2 (n int); create table mysqltest1.t3 (n int); ---replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; @@ -30,7 +30,7 @@ while ($1) } --enable_query_log ---replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; From 7eb4bd3f1ddd9b84425d51550b44c14ac0a8f1de Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Fri, 21 Oct 2016 22:43:46 +0200 Subject: [PATCH 73/96] Upstream patch from Debian Bug 838557 The patch fixes 128-bit multiply on mips64. This corrects a previous incorrect patch upstreamed from Debian. --- extra/yassl/taocrypt/src/integer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extra/yassl/taocrypt/src/integer.cpp b/extra/yassl/taocrypt/src/integer.cpp index fb8d9276bd9..dd8425396ed 100644 --- a/extra/yassl/taocrypt/src/integer.cpp +++ b/extra/yassl/taocrypt/src/integer.cpp @@ -193,8 +193,9 @@ DWord() {} "a" (a), "rm" (b) : "cc"); #elif defined(__mips64) - __asm__("dmultu %2,%3" : "=d" (r.halfs_.high), "=l" (r.halfs_.low) - : "r" (a), "r" (b)); + unsigned __int128 t = (unsigned __int128) a * b; + r.halfs_.high = t >> 64; + r.halfs_.low = (word) t; #elif defined(_M_IX86) // for testing From 39b7affcb13f9f508242e90ecd5db03b3bb3cb85 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Fri, 21 Oct 2016 23:02:56 +0200 Subject: [PATCH 74/96] Upstream MIPS 32bit-build-on-64bit patch from Debian Bug#838914 From https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=838914 Fixes CMake so that when building a 32-bit mips binary on a 64-bit mips machine, the target is not set as 32-bit, which apparently confused some tests in mroonga. --- cmake/package_name.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/package_name.cmake b/cmake/package_name.cmake index 4ba8fc18e3f..48ca3a4814d 100644 --- a/cmake/package_name.cmake +++ b/cmake/package_name.cmake @@ -30,6 +30,10 @@ IF(NOT VERSION) SET(64BIT 1) ENDIF() + IF(NOT 64BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^mips64") + SET(DEFAULT_MACHINE "mips") + ENDIF() + IF(CMAKE_SYSTEM_NAME MATCHES "Windows") SET(NEED_DASH_BETWEEN_PLATFORM_AND_MACHINE 0) SET(DEFAULT_PLATFORM "win") From fb38d2642011c574cc9103ae1a1f9dd77f7f027e Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 22 Oct 2016 07:34:23 +0000 Subject: [PATCH 75/96] MDEV-11104 Fix client to correctly retrieve current user name on Windows Prior to this patch name of the user was read from environment variable USER, with a fallback to 'ODBC', if the environment variable is not set. The name of the env.variable is incorrect (USERNAME usually contains current user's name, but not USER), which made client to always determine current user as 'ODBC'. The fix is to use GetUserName() instead. --- libmysql/libmysql.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c index 446f1da0b0c..3a08ea26b1d 100644 --- a/libmysql/libmysql.c +++ b/libmysql/libmysql.c @@ -450,8 +450,9 @@ void read_user_name(char *name) void read_user_name(char *name) { - char *str=getenv("USER"); /* ODBC will send user variable */ - strmake(name,str ? str : "ODBC", USERNAME_LENGTH); + DWORD len= USERNAME_LENGTH; + if (!GetUserName(name, &len)) + strmov(name,"UNKNOWN_USER"); } #endif From 0c925aa9356ee9d31283510c2420d1b5f21f5c9c Mon Sep 17 00:00:00 2001 From: Elena Stepanova Date: Sun, 23 Oct 2016 18:47:44 +0300 Subject: [PATCH 76/96] MDEV-11097 - Update the list of unstable tests --- mysql-test/unstable-tests | 89 ++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 53 deletions(-) diff --git a/mysql-test/unstable-tests b/mysql-test/unstable-tests index 2dbaeeebc0b..3e25115599f 100644 --- a/mysql-test/unstable-tests +++ b/mysql-test/unstable-tests @@ -23,78 +23,66 @@ # ############################################################################## -main.bootstrap : Modified on 2016-06-18 (MDEV-9969) main.create_delayed : MDEV-10605 - failed with timeout -main.create_or_replace : Modified on 2016-06-23 (MDEV-9728) -main.ctype_recoding : Modified on 2016-06-10 (MDEV-10181) -main.ctype_utf8 : Modified on 2016-06-21 (merge) -main.ctype_utf8mb4 : Modified on 2016-06-21 (merge) -main.events_1 : Modified on 2016-06-21 (MDEV-9524) +main.ctype_utf32 : Modified on 2016-09-27 (merge) main.func_group : Modified on 2016-08-08 (MDEV-10468) -main.func_in : Modified on 2016-06-20 (MDEV-10020) main.func_math : Modified on 2016-08-10 (merge) main.func_misc : Modified on 2016-08-10 (merge) -main.grant2 : Modified on 2016-07-18 (MDEV-8569) -main.help : Modified on 2016-06-21 (MDEV-9524) +main.group_min_max_innodb : Modified on 2016-08-25 (MDEV-10595) main.host_cache_size_functionality : MDEV-10606 - sporadic failure on shutdown main.index_intersect_innodb : MDEV-10643 - failed with timeout -main.index_merge_innodb : MDEV-7142 - sporadic wrong execution plan +main.index_merge_myisam : Modified on 2016-09-05 (include file changed) +main.index_merge_innodb : Modified on 2016-09-05 (MDEV-7142) main.information_schema_stats : Modified on 2016-07-25 (MDEV-10428) main.innodb_mysql_lock : MDEV-7861 - sporadic lock detection failure -main.insert_innodb : Modified on 2016-06-14 (merge from upstream) main.loaddata : Modified on 2016-08-10 (merge) -main.locale : Modified on 2016-06-21 (merge) main.mdev-504 : MDEV-10607 - sporadic "can't connect" main.mdev375 : MDEV-10607 - sporadic "can't connect" main.merge : MDEV-10607 - sporadic "can't connect" -main.multi_update : Modified on 2016-06-20 (MDEV-5973) main.myisam_enable_keys-10506 : New test, added on 2016-08-10 (MDEV-10506) main.mysqlcheck : Modified on 2016-08-10 (merge) main.mysqldump : MDEV-10512 - sporadic assertion failure +main.mysqlhotcopy_myisam : MDEV-10995 - test hangs on debug build main.mysqltest : MDEV-9269 - fails on Alpha main.named_pipe : Modified on 2016-08-02 (MDEV-10383) -main.openssl_1 : Modified on 2016-07-11 (MDEV-10211) -main.parser : Modified on 2016-06-21 (merge) main.pool_of_threads : MDEV-10100 - sporadic error on detecting max connections main.ps : MDEV-11017 - sporadic wrong Prepared_stmt_count -main.ps_1general : Modified on 2016-07-12 (merge) main.range : Modified on 2016-08-10 (merge) main.range_mrr_icp : Modified on 2016-08-10 (merge) main.query_cache : MDEV-10611 - sporadic mutex problem -main.shutdown : MDEV-10612 - sporadic crashes +main.shutdown : MDEV-10563 - sporadic crashes main.sp-prelocking : Modified on 2016-08-10 (merge) main.sp-security : MDEV-10607 - sporadic "can't connect" -main.ssl : MDEV-10211 - different ciphers on some platforms -main.ssl_ca : Modified on 2016-07-11 (MDEV-10211) -main.ssl_compress : Modified on 2016-07-11 (MDEV-10211) -main.ssl_timeout : Modified on 2016-07-11 (MDEV-10211) +main.ssl_compress : MDEV-11110 - valgrind failures main.stat_tables_par_innodb : MDEV-10515 - sporadic wrong results -main.status_user : Modified on 2016-06-20 (MDEV-8633) main.subselect_innodb : MDEV-10614 - sporadic wrong results -main.temp_table : Modified on 2016-06-18 (MDEV-8569) main.type_date : Modified on 2016-08-10 (merge) -main.type_datetime : Modified on 2016-06-16 (MDEV-9374) +main.type_uint : Modified on 2016-09-27 (merge) main.view : Modified on 2016-08-10 (merge) main.xtradb_mrr : Modified on 2016-08-04 (MDEV-9946) #---------------------------------------------------------------- -archive.archive-big : MDEV-10615 - table is marked as crashed -archive.discover : MDEV-10510 - table is marked as crashed +archive.archive-big : MDEV-10615 - table is marked as crashed +archive.discover : MDEV-10510 - table is marked as crashed +archive.mysqlhotcopy_archive : MDEV-10995 - test hangs on debug build #---------------------------------------------------------------- binlog.binlog_commit_wait : MDEV-10150 - Error: too much time elapsed -binlog.binlog_dmls_on_tmp_tables_readonly : New test, added on 2016-05-04 (upstream) binlog.binlog_xa_recover : MDEV-8517 - Extra checkpoint #---------------------------------------------------------------- connect.tbl : MDEV-9844, MDEV-10179 - sporadic crashes, valgrind warnings, wrong results -connect.jdbc : New test, added on 2016-07-15 -connect.jdbc-new : New test, added on 2016-07-14 -connect.jdbc-oracle : New test, added on 2016-07-13 -connect.jdbc-postgresql : New test, added on 2016-07-13 + +#---------------------------------------------------------------- + +engines/rr_trx.* : MDEV-10998 - tests not maintained + +#---------------------------------------------------------------- + +extra/binlog_tests.database : Modified on 2016-10-21 (Upstream MIPS test fixes) #---------------------------------------------------------------- @@ -105,21 +93,19 @@ federated.federated_transactions : MDEV-10617, MDEV-10417 - Wrong checksum, time #---------------------------------------------------------------- -funcs_1.processlist_priv_no_prot : Include file modified on 2016-07-12 (merge) -funcs_1.processlist_priv_ps : Include file modified on 2016-07-12 (merge) +funcs_2/charset.* : MDEV-10999 - test not maintained #---------------------------------------------------------------- innodb.binlog_consistent : MDEV-10618 - Server fails to start innodb.innodb-alter-table : MDEV-10619 - Testcase timeout innodb.innodb-alter-tempfile : Modified on 2016-08-09 (MDEV-10469) -innodb.innodb_corrupt_bit : Modified on 2016-06-21 (merge) innodb.innodb_bug30423 : MDEV-7311 - Wrong number of rows in the plan -innodb.innodb-fk-warnings : Modified on 2016-07-18 (MDEV-8569) -innodb.innodb-fkcheck : Modified on 2016-06-13 (MDEV-10083) +innodb.innodb_bug54044 : Modified on 2016-09-27 (merge) innodb.innodb_monitor : MDEV-10939 - Testcase timeout innodb.innodb-wl5522 : rdiff file modified on 2016-08-10 (merge) innodb.innodb-wl5522-debug-zip : MDEV-10427 - Warning: database page corruption +innodb.system_tables : Added on 2016-09-23 (MDEV-10775) #---------------------------------------------------------------- @@ -144,22 +130,16 @@ parts.partition_int_myisam : MDEV-10621 - Testcase timeout #---------------------------------------------------------------- -perfschema.digest_table_full : Modified on 2016-06-21 (merge) perfschema.func_file_io : MDEV-5708 - fails for s390x perfschema.func_mutex : MDEV-5708 - fails for s390x perfschema.hostcache_ipv6_ssl : MDEV-10696 - crash on shutdown -perfschema.rpl_gtid_func : Modified on 2016-06-21 (merge) -perfschema.sizing_low : Modified on 2016-04-26 (5.6.30 merge) perfschema.socket_summary_by_event_name_func : MDEV-10622 - Socket summary tables do not match -perfschema.start_server_low_digest : Modified on 2016-06-21 (merge) -perfschema.statement_digest : Modified on 2016-06-21 (merge) -perfschema.statement_digest_consumers : Modified on 2016-06-21 (merge) -perfschema.statement_digest_long_query : Modified on 2016-06-21 (merge) -perfschema.table_name : New test, added on 2016-04-26 (5.6.30 merge) + +perfschema_stress.* : MDEV-10996 - tests not maintained #---------------------------------------------------------------- -plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url +plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url, MDEV-11112 - valgrind warnings plugins.pam : Modified on 2016-08-03 (MDEV-7329) plugins.pam_cleartext : Modified on 2016-08-03 plugins.server_audit : MDEV-9562 - crashes on sol10-sparc @@ -167,11 +147,6 @@ plugins.thread_pool_server_audit : MDEV-9562 - crashes on sol10-sparc #---------------------------------------------------------------- -roles.rpl_grant_revoke_current_role-8638 : New test, added on 2016-06-20 (MDEV-8638) -roles.set_role-9614 : New test, added on 2016-05-30 (MDEV-9614) - -#---------------------------------------------------------------- - rpl.last_insert_id : MDEV-10625 - warnings in error log rpl.rpl_auto_increment : MDEV-10417 - Fails on Mips rpl.rpl_auto_increment_bug45679 : MDEV-10417 - Fails on Mips @@ -180,11 +155,11 @@ rpl.rpl_binlog_index : MDEV-9501 - Warning: failed registering rpl.rpl_checksum_cache : MDEV-10626 - Testcase timeout rpl.rpl_circular_for_4_hosts : MDEV-10627 - Testcase timeout rpl.rpl_ddl : MDEV-10417 - Fails on Mips +rpl.rpl_drop_db : Modified on 2016-10-21 (Upstream MIPS test fixes) rpl.rpl_gtid_crash : MDEV-9501 - Warning: failed registering on master rpl.rpl_gtid_master_promote : MDEV-10628 - Timeout in sync_with_master rpl.rpl_gtid_stop_start : MDEV-10629 - Crash on shutdown rpl.rpl_gtid_until : MDEV-10625 - warnings in error log -rpl.rpl_ignore_table : Modified on 2016-06-22 rpl.rpl_innodb_bug30888 : MDEV-10417 - Fails on Mips rpl.rpl_insert : MDEV-9329 - Fails on Ubuntu/s390x rpl.rpl_insert_delayed : MDEV-9329 - Fails on Ubuntu/s390x @@ -204,6 +179,8 @@ rpl.rpl_temporary_error2 : MDEV-10634 - Wrong number of retries rpl.sec_behind_master-5114 : MDEV-8518 - Wrong value of Seconds_Behind_Master rpl.rpl_skip_replication : MDEV-9268 - Fails with timeout in sync_slave_with_master on Alpha +rpl/extra/rpl_tests.* : MDEV-10994 - tests not maintained + #---------------------------------------------------------------- spider.* : MDEV-9329 - tests are too memory-consuming @@ -217,6 +194,10 @@ spider/bg.vp_fixes : MDEV-9329 - Fails on Ubuntu/s390x #---------------------------------------------------------------- +sphinx.* : MDEV-10747 - tests are not run in buildbot, they can't be stable + +#---------------------------------------------------------------- + stress.ddl_innodb : MDEV-10635 - Testcase timeout #---------------------------------------------------------------- @@ -232,11 +213,14 @@ tokudb.background_job_manager : MDEV-10327 - Assertion failure on server tokudb.cluster_filter_unpack_varchar : MDEV-10636 - Wrong execution plan tokudb.* : MDEV-9891 - massive crashes on shutdown tokudb_alter_table.* : MDEV-9891 - massive crashes on shutdown +tokudb_backup.* : MDEV-11001 - tests don't work tokudb_bugs.checkpoint_lock : MDEV-10637 - Wrong processlist output tokudb_bugs.checkpoint_lock_3 : MDEV-10637 - Wrong processlist output tokudb_bugs.* : MDEV-9891 - massive crashes on shutdown tokudb_parts.* : MDEV-9891 - massive crashes on shutdown -rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown, also modified on 2016-06-10 (Merge) +tokudb_rpl_suites.* : MDEV-11001 - tests don't work +tokudb_sys_vars.* : MDEV-11001 - tests don't work +rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_add_index.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_backup.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_mariadb.* : MDEV-9891 - massive crashes on shutdown @@ -250,7 +234,6 @@ unit.ma_test_loghandler : MDEV-10638 - record read not ok #---------------------------------------------------------------- -vcol.charsets : Added on 2016-06-23 vcol.not_supported : MDEV-10639 - Testcase timeout vcol.vcol_keys_innodb : MDEV-10639 - Testcase timeout From 3321f1adc74b54e7534000c06eeca166730ccc4a Mon Sep 17 00:00:00 2001 From: Don Lewis Date: Tue, 21 Jun 2016 13:35:59 +1000 Subject: [PATCH 77/96] MDEV-5944: Compile fix for OQGRAPH with LLVM Clang/LLVM has more strict schemantics than gcc. This patch quantifies the namesspace such that it will compile using clang. --- storage/oqgraph/graphcore.cc | 2 +- storage/oqgraph/oqgraph_shim.h | 48 +++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/storage/oqgraph/graphcore.cc b/storage/oqgraph/graphcore.cc index 4346b94805c..7c8ca53c096 100644 --- a/storage/oqgraph/graphcore.cc +++ b/storage/oqgraph/graphcore.cc @@ -485,7 +485,7 @@ namespace open_query optional oqgraph_share::find_vertex(VertexID id) const { - return ::boost::find_vertex(id, g); + return oqgraph3::find_vertex(id, g); } #if 0 diff --git a/storage/oqgraph/oqgraph_shim.h b/storage/oqgraph/oqgraph_shim.h index af240b88ebd..004d7f0f7c5 100644 --- a/storage/oqgraph/oqgraph_shim.h +++ b/storage/oqgraph/oqgraph_shim.h @@ -274,6 +274,33 @@ namespace boost }; #endif + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::edge_weight_property_map const_type; + }; + + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::vertex_index_property_map const_type; + }; + + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::edge_index_property_map const_type; + }; + +} + +namespace oqgraph3 +{ + using namespace boost; + inline graph_traits::vertex_descriptor source( const graph_traits::edge_descriptor& e, @@ -401,27 +428,6 @@ namespace boost return count; } - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::edge_weight_property_map const_type; - }; - - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::vertex_index_property_map const_type; - }; - - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::edge_index_property_map const_type; - }; - inline property_map< oqgraph3::graph, edge_weight_t>::const_type::reference From ba11dd69fee7b82edf4e6afbb13e3fa94cd885ca Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Tue, 25 Oct 2016 12:21:53 +0000 Subject: [PATCH 78/96] MDEV-11127 : Fix innochecksum to work with large files on Windows. - don't use stat() for file size, it doesn not handle large size use GetFileSizeEx() instead - don't use lseek(), it can't handle large files, use _lseeki64() instead. - Also, switch off OS file buffering for innochecksum on Windows, to avoid thrashing file cache. --- extra/innochecksum.cc | 55 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc index 6018a4884ea..97d47b4563a 100644 --- a/extra/innochecksum.cc +++ b/extra/innochecksum.cc @@ -243,10 +243,9 @@ int main(int argc, char **argv) time_t lastt; /* last time */ ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield; /* ulints for checksum storage */ - struct stat st; /* for stat, if you couldn't guess */ unsigned long long int size; /* size of file (has to be 64 bits) */ ulint pages; /* number of pages in file */ - off_t offset= 0; + unsigned long long offset= 0; int fd; printf("InnoDB offline file checksum utility.\n"); @@ -269,6 +268,47 @@ int main(int argc, char **argv) goto error; } +#ifdef _WIN32 + /* Switch off OS file buffering for the file. */ + + HANDLE h = CreateFile(filename, GENERIC_READ, + FILE_SHARE_READ|FILE_SHARE_WRITE, 0, + OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, 0); + + if (!h) + { + fprintf(stderr, "Error; cant open file\n"); + goto error; + } + + if (!GetFileSizeEx(h, (LARGE_INTEGER *)&size)) + { + fprintf(stderr, "Error; GetFileSize() failed\n"); + goto error; + } + + fd = _open_osfhandle ((intptr_t) h, _O_RDONLY); + if (fd < 0) + { + fprintf(stderr, "Error; _open_osfhandle() failed\n"); + goto error; + } + + f = _fdopen(fd, "rb"); + if (!f) + { + fprintf(stderr, "Error; fdopen() failed\n"); + goto error; + } + + /* + Disable stdio buffering (FILE_FLAG_NO_BUFFERING requires properly IO buffers + which stdio does not guarantee. + */ + setvbuf(f, NULL, _IONBF, 0); + +#else + struct stat st; /* stat the file to get size and page count */ if (stat(filename, &st)) { @@ -279,6 +319,8 @@ int main(int argc, char **argv) /* Open the file for reading */ f= fopen(filename, "rb"); +#endif + if (f == NULL) { fprintf(stderr, "Error; %s cannot be opened", filename); @@ -323,7 +365,7 @@ int main(int argc, char **argv) } else if (verbose) { - printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages); + printf("file %s = %llu bytes (%lu pages)...\n", filename, size, (ulong)pages); if (do_one_page) printf("InnoChecksum; checking page %lu\n", do_page); else @@ -348,9 +390,12 @@ int main(int argc, char **argv) goto error; } - offset= (off_t)start_page * (off_t)physical_page_size; - + offset= (ulonglong)start_page * (ulonglong)physical_page_size; +#ifdef _WIN32 + if (_lseeki64(fd, offset, SEEK_SET) != offset) +#else if (lseek(fd, offset, SEEK_SET) != offset) +#endif { perror("Error; Unable to seek to necessary offset"); goto error; From 39dceaae607e2c9f53146d5b23f8dee330643cb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Sun, 9 Oct 2016 12:09:44 +0200 Subject: [PATCH 79/96] MDEV-10983: TokuDB does not compile on OS X 10.12 Make use of a different function to get the current tid. Additionally, librt doesn't exist on OS X. Use System library instead. --- .../PerconaFT/cmake_modules/TokuFeatureDetection.cmake | 4 +++- storage/tokudb/PerconaFT/portability/portability.cc | 9 ++++++++- storage/tokudb/PerconaFT/portability/tests/test-xid.cc | 9 ++++++++- storage/tokudb/PerconaFT/portability/toku_config.h.in | 1 + 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake index 4c5004cd6a5..883f35041e2 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake @@ -97,7 +97,7 @@ if (NOT HAVE_BACKTRACE_WITHOUT_EXECINFO) endif () endif () -if(HAVE_CLOCK_REALTIME) +if(HAVE_CLOCK_REALTIME AND (NOT APPLE)) list(APPEND EXTRA_SYSTEM_LIBS rt) else() list(APPEND EXTRA_SYSTEM_LIBS System) @@ -109,6 +109,8 @@ check_function_exists(pthread_rwlockattr_setkind_np HAVE_PTHREAD_RWLOCKATTR_SETK ## check for the right way to yield using pthreads check_function_exists(pthread_yield HAVE_PTHREAD_YIELD) check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP) +## check if we have pthread_threadid_np() (i.e. osx) +check_function_exists(pthread_threadid_np HAVE_PTHREAD_THREADID_NP) ## check if we have pthread_getthreadid_np() (i.e. freebsd) check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP) check_function_exists(sched_getcpu HAVE_SCHED_GETCPU) diff --git a/storage/tokudb/PerconaFT/portability/portability.cc b/storage/tokudb/PerconaFT/portability/portability.cc index ba9f8d48ed5..19f445a85d7 100644 --- a/storage/tokudb/PerconaFT/portability/portability.cc +++ b/storage/tokudb/PerconaFT/portability/portability.cc @@ -63,6 +63,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #if defined(HAVE_SYS_SYSCTL_H) # include #endif +#if defined(HAVE_PTHREAD_H) +# include +#endif #if defined(HAVE_PTHREAD_NP_H) # include #endif @@ -102,7 +105,11 @@ toku_os_getpid(void) { int toku_os_gettid(void) { -#if defined(__NR_gettid) +#if defined(HAVE_PTHREAD_THREADID_NP) + uint64_t result; + pthread_threadid_np(NULL, &result); + return (int) result; // Used for instrumentation so overflow is ok here. +#elif defined(__NR_gettid) return syscall(__NR_gettid); #elif defined(SYS_gettid) return syscall(SYS_gettid); diff --git a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc index 9ee68906bb3..71736f898ef 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc @@ -51,11 +51,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #if defined(HAVE_PTHREAD_NP_H) # include #endif +#if defined(HAVE_PTHREAD_H) +# include +#endif // since we implement the same thing here as in toku_os_gettid, this test // is pretty pointless static int gettid(void) { -#if defined(__NR_gettid) +#if defined(HAVE_PTHREAD_THREADID_NP) + uint64_t result; + pthread_threadid_np(NULL, &result); + return (int) result; +#elif defined(__NR_gettid) return syscall(__NR_gettid); #elif defined(SYS_gettid) return syscall(SYS_gettid); diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in index 1a34bf1ef45..18f6779796f 100644 --- a/storage/tokudb/PerconaFT/portability/toku_config.h.in +++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in @@ -87,6 +87,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #cmakedefine HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP 1 #cmakedefine HAVE_PTHREAD_YIELD 1 #cmakedefine HAVE_PTHREAD_YIELD_NP 1 +#cmakedefine HAVE_PTHREAD_THREADID_NP 1 #cmakedefine HAVE_PTHREAD_GETTHREADID_NP 1 #cmakedefine PTHREAD_YIELD_RETURNS_INT 1 From 1daf746e31e38a3ec1cdcb9427153b65f744dcda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 25 Oct 2016 16:34:22 +0300 Subject: [PATCH 80/96] Add tokuftdump man page The man page was already present in the debian release of MariaDB 10.0. --- man/CMakeLists.txt | 2 +- tokuftdump.1 | 237 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 tokuftdump.1 diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt index c4383b31a17..c6163fee537 100644 --- a/man/CMakeLists.txt +++ b/man/CMakeLists.txt @@ -22,7 +22,7 @@ SET(MAN1_SERVER innochecksum.1 my_print_defaults.1 myisam_ftdump.1 myisamchk.1 mysql_tzinfo_to_sql.1 mysql_upgrade.1 mysql_zap.1 mysqld_multi.1 mysqld_safe.1 mysqldumpslow.1 mysqlhotcopy.1 mysqltest.1 perror.1 replace.1 resolve_stack_dump.1 - resolveip.1 mysqlbug.1) + resolveip.1 mysqlbug.1 tokuftdump.1) SET(MAN8_SERVER mysqld.8) SET(MAN1_CLIENT msql2mysql.1 mysql.1 mysql_find_rows.1 mysql_waitpid.1 mysqlaccess.1 mysqladmin.1 mysqlbinlog.1 mysqlcheck.1 diff --git a/tokuftdump.1 b/tokuftdump.1 new file mode 100644 index 00000000000..3d9faae30ca --- /dev/null +++ b/tokuftdump.1 @@ -0,0 +1,237 @@ +'\" t +.\" +.TH "\FBTOKUFTDUMP\FR" "1" "04/07/2016" "MariaDB 10\&.0" "MariaDB Database System" +.\" ----------------------------------------------------------------- +.\" * set default formatting +.\" ----------------------------------------------------------------- +.\" disable hyphenation +.nh +.\" disable justification (adjust text to left margin only) +.ad l +.\" ----------------------------------------------------------------- +.\" * MAIN CONTENT STARTS HERE * +.\" ----------------------------------------------------------------- +.\" tokuftdump +.\" upgrading MySQL +.SH "NAME" +tokuftdump \- look into the fractal tree file +.SH "SYNOPSIS" +.HP \w'\fBtokuftdump\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u +\fBtokuftdump [\fR\fB\fIoptions\fR\fR\fB]\fR +.SH "DESCRIPTION" +.PP +\fBtokuftdump\fR +Investigates and diagnoses the fractal tree\&. +.PP +\fBtokuftdump\fR +supports the following options for processing option files\&. +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: interactive option +.\" interactive option: tokuftdump +\fB\-\-interactive\fR +.sp +Interactive\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: support option +.\" support option: tokuftdump +\fB\-\-support \fI/path/to/fractal-tree/file\fR +.sp +An interactive way to see what messages and/or switch between FTs\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: json option +.\" json option: tokuftdump +\fB\-\-json \fI/path/to/fractal-tree/file [output_json_file]\fR +.sp +If the output json file is left empty, FT\&.json will be created automatically\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: nodata option +.\" nodata option: tokuftdump +\fB\-\-nodata\fR +.sp +Nodata\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: dumpdata option +.\" dumpdata option: tokuftdump +\fB\-\-dumpdata = \fR\fB\fI0|1\fR\fR +.sp +Dumpdata\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: header option +.\" header option: tokuftdump +\fB\-\-header\fR +.sp +Header\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: rootnode option +.\" rootnode option: tokuftdump +\fB\-\-rootnode\fR +.sp +Rootnode\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: node option +.\" node option: tokuftdump +\fB\-\-node \fIN\fR +.sp +Node\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: fragmentation option +.\" fragmentation option: tokuftdump +\fB\-\-fragmentation\fR +.sp +Fragmentation\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: garbage option +.\" garbage option: tokuftdump +\fB\-\-garbage\fR +.sp +Garbage\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: tsv option +.\" tsv option: tokuftdump +\fB\-\-tsv\fR +.sp +TSV\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: translation-table option +.\" translation-table option: tokuftdump +\fB\-\-translation\-table\fR +.sp +Translation table\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} +.\" tokuftdump: summary option +.\" summary option: tokuftdump +\fB\-\-summary\fR +.sp +Provide summary info\&. +.RE +.SH "COPYRIGHT" +.br +.PP +Copyright 2016 MariaDB Foundation +.PP +This documentation is free software; you can redistribute it and/or modify it only under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. +.PP +This documentation is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +.PP +You should have received a copy of the GNU General Public License along with the program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or see http://www.gnu.org/licenses/. +.sp +.SH "SEE ALSO" +For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/ +.SH AUTHOR +MariaDB Foundation (http://www.mariadb.org/). From ed3998ae7cc286860670bc9a285aeb99c5edcced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 25 Oct 2016 15:46:10 +0200 Subject: [PATCH 81/96] Revert "Add tokuftdump man page" This reverts commit 1daf746e31e38a3ec1cdcb9427153b65f744dcda. Removed temporarily to make sure there are no legal problems. --- man/CMakeLists.txt | 2 +- tokuftdump.1 | 237 --------------------------------------------- 2 files changed, 1 insertion(+), 238 deletions(-) delete mode 100644 tokuftdump.1 diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt index c6163fee537..c4383b31a17 100644 --- a/man/CMakeLists.txt +++ b/man/CMakeLists.txt @@ -22,7 +22,7 @@ SET(MAN1_SERVER innochecksum.1 my_print_defaults.1 myisam_ftdump.1 myisamchk.1 mysql_tzinfo_to_sql.1 mysql_upgrade.1 mysql_zap.1 mysqld_multi.1 mysqld_safe.1 mysqldumpslow.1 mysqlhotcopy.1 mysqltest.1 perror.1 replace.1 resolve_stack_dump.1 - resolveip.1 mysqlbug.1 tokuftdump.1) + resolveip.1 mysqlbug.1) SET(MAN8_SERVER mysqld.8) SET(MAN1_CLIENT msql2mysql.1 mysql.1 mysql_find_rows.1 mysql_waitpid.1 mysqlaccess.1 mysqladmin.1 mysqlbinlog.1 mysqlcheck.1 diff --git a/tokuftdump.1 b/tokuftdump.1 deleted file mode 100644 index 3d9faae30ca..00000000000 --- a/tokuftdump.1 +++ /dev/null @@ -1,237 +0,0 @@ -'\" t -.\" -.TH "\FBTOKUFTDUMP\FR" "1" "04/07/2016" "MariaDB 10\&.0" "MariaDB Database System" -.\" ----------------------------------------------------------------- -.\" * set default formatting -.\" ----------------------------------------------------------------- -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l -.\" ----------------------------------------------------------------- -.\" * MAIN CONTENT STARTS HERE * -.\" ----------------------------------------------------------------- -.\" tokuftdump -.\" upgrading MySQL -.SH "NAME" -tokuftdump \- look into the fractal tree file -.SH "SYNOPSIS" -.HP \w'\fBtokuftdump\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u -\fBtokuftdump [\fR\fB\fIoptions\fR\fR\fB]\fR -.SH "DESCRIPTION" -.PP -\fBtokuftdump\fR -Investigates and diagnoses the fractal tree\&. -.PP -\fBtokuftdump\fR -supports the following options for processing option files\&. -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: interactive option -.\" interactive option: tokuftdump -\fB\-\-interactive\fR -.sp -Interactive\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: support option -.\" support option: tokuftdump -\fB\-\-support \fI/path/to/fractal-tree/file\fR -.sp -An interactive way to see what messages and/or switch between FTs\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: json option -.\" json option: tokuftdump -\fB\-\-json \fI/path/to/fractal-tree/file [output_json_file]\fR -.sp -If the output json file is left empty, FT\&.json will be created automatically\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: nodata option -.\" nodata option: tokuftdump -\fB\-\-nodata\fR -.sp -Nodata\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: dumpdata option -.\" dumpdata option: tokuftdump -\fB\-\-dumpdata = \fR\fB\fI0|1\fR\fR -.sp -Dumpdata\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: header option -.\" header option: tokuftdump -\fB\-\-header\fR -.sp -Header\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: rootnode option -.\" rootnode option: tokuftdump -\fB\-\-rootnode\fR -.sp -Rootnode\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: node option -.\" node option: tokuftdump -\fB\-\-node \fIN\fR -.sp -Node\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: fragmentation option -.\" fragmentation option: tokuftdump -\fB\-\-fragmentation\fR -.sp -Fragmentation\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: garbage option -.\" garbage option: tokuftdump -\fB\-\-garbage\fR -.sp -Garbage\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: tsv option -.\" tsv option: tokuftdump -\fB\-\-tsv\fR -.sp -TSV\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: translation-table option -.\" translation-table option: tokuftdump -\fB\-\-translation\-table\fR -.sp -Translation table\&. -.RE -.sp -.RS 4 -.ie n \{\ -\h'-04'\(bu\h'+03'\c -.\} -.el \{\ -.sp -1 -.IP \(bu 2.3 -.\} -.\" tokuftdump: summary option -.\" summary option: tokuftdump -\fB\-\-summary\fR -.sp -Provide summary info\&. -.RE -.SH "COPYRIGHT" -.br -.PP -Copyright 2016 MariaDB Foundation -.PP -This documentation is free software; you can redistribute it and/or modify it only under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. -.PP -This documentation is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -.PP -You should have received a copy of the GNU General Public License along with the program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or see http://www.gnu.org/licenses/. -.sp -.SH "SEE ALSO" -For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/ -.SH AUTHOR -MariaDB Foundation (http://www.mariadb.org/). From d7dc03a26797f07625e8c44d2d1ac7f76e860bad Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 25 Oct 2016 17:01:37 +0200 Subject: [PATCH 82/96] 5.6.33-79.0 --- storage/xtradb/buf/buf0buf.cc | 4 +- storage/xtradb/buf/buf0dblwr.cc | 2 +- storage/xtradb/buf/buf0flu.cc | 5 + storage/xtradb/dict/dict0boot.cc | 4 + storage/xtradb/dict/dict0crea.cc | 583 ++++++++++++++++++++++ storage/xtradb/dict/dict0dict.cc | 158 ++++++ storage/xtradb/dict/dict0load.cc | 159 +++++- storage/xtradb/fil/fil0fil.cc | 2 + storage/xtradb/fts/fts0fts.cc | 31 ++ storage/xtradb/handler/ha_innodb.cc | 392 ++++++++++++++- storage/xtradb/handler/ha_innodb.h | 37 ++ storage/xtradb/handler/handler0alter.cc | 66 ++- storage/xtradb/handler/i_s.cc | 2 + storage/xtradb/handler/xtradb_i_s.cc | 350 +++++++++++++ storage/xtradb/handler/xtradb_i_s.h | 2 + storage/xtradb/include/data0type.h | 14 + storage/xtradb/include/data0type.ic | 16 + storage/xtradb/include/dict0boot.h | 32 ++ storage/xtradb/include/dict0crea.h | 91 ++++ storage/xtradb/include/dict0dict.h | 46 ++ storage/xtradb/include/dict0load.h | 29 ++ storage/xtradb/include/fts0fts.h | 10 + storage/xtradb/include/os0thread.h | 15 +- storage/xtradb/include/rem0types.h | 3 + storage/xtradb/include/row0mysql.h | 85 +++- storage/xtradb/include/srv0srv.h | 5 + storage/xtradb/include/univ.i | 2 +- storage/xtradb/log/log0log.cc | 20 +- storage/xtradb/log/log0online.cc | 33 +- storage/xtradb/mach/mach0data.cc | 13 +- storage/xtradb/os/os0thread.cc | 24 +- storage/xtradb/rem/rem0rec.cc | 23 +- storage/xtradb/row/row0ftsort.cc | 2 +- storage/xtradb/row/row0log.cc | 14 +- storage/xtradb/row/row0merge.cc | 18 +- storage/xtradb/row/row0mysql.cc | 634 +++++++++++++++++++++++- storage/xtradb/row/row0sel.cc | 45 +- storage/xtradb/srv/srv0start.cc | 6 + 38 files changed, 2892 insertions(+), 85 deletions(-) diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 321a1d9f673..978d94f07ec 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -4492,7 +4492,9 @@ corrupt: recv_recover_page(TRUE, (buf_block_t*) bpage); } - if (uncompressed && !recv_no_ibuf_operations) { + if (uncompressed && !recv_no_ibuf_operations + && fil_page_get_type(frame) == FIL_PAGE_INDEX + && page_is_leaf(frame)) { buf_block_t* block; ibool update_ibuf_bitmap; diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index f4d1c637e3e..3c12d6da73f 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -521,7 +521,7 @@ buf_dblwr_process() if (buf_page_is_corrupted(true, read_buf, zip_size)) { fprintf(stderr, - "InnoDB: Warning: database page" + "InnoDB: Database page" " corruption or a failed\n" "InnoDB: file read of" " space %lu page %lu.\n" diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 14a5fbde7e8..5dd2efcf0c3 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -2568,6 +2568,11 @@ page_cleaner_sleep_if_needed( ulint next_loop_time) /*!< in: time when next loop iteration should start */ { + /* No sleep if we are cleaning the buffer pool during the shutdown + with everything else finished */ + if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE) + return; + ulint cur_time = ut_time_ms(); if (next_loop_time > cur_time) { diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc index 94a3af2852b..c0bb0298bea 100644 --- a/storage/xtradb/dict/dict0boot.cc +++ b/storage/xtradb/dict/dict0boot.cc @@ -272,6 +272,10 @@ dict_boot(void) ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2); ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4); ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6); + ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT == 3); + ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT == 5); + ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT_COLS == 3); + ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS == 5); mtr_start(&mtr); diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc index a4fcf57c028..9460ce51659 100644 --- a/storage/xtradb/dict/dict0crea.cc +++ b/storage/xtradb/dict/dict0crea.cc @@ -38,6 +38,7 @@ Created 1/8/1996 Heikki Tuuri #include "que0que.h" #include "row0ins.h" #include "row0mysql.h" +#include "row0sel.h" #include "pars0pars.h" #include "trx0roll.h" #include "usr0sess.h" @@ -1790,6 +1791,135 @@ dict_create_or_check_sys_tablespace(void) return(err); } +/** Creates the zip_dict system table inside InnoDB +at server bootstrap or server start if it is not found or is +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_sys_zip_dict(void) +{ + trx_t* trx; + my_bool srv_file_per_table_backup; + dberr_t err; + dberr_t sys_zip_dict_err; + dberr_t sys_zip_dict_cols_err; + + ut_a(srv_get_active_thread_type() == SRV_NONE); + + /* Note: The master thread has not been started at this point. */ + + sys_zip_dict_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2); + sys_zip_dict_cols_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT_COLS", DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, + 1); + + if (sys_zip_dict_err == DB_SUCCESS && + sys_zip_dict_cols_err == DB_SUCCESS) + return (DB_SUCCESS); + + trx = trx_allocate_for_mysql(); + + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + trx->op_info = "creating zip_dict and zip_dict_cols sys tables"; + + row_mysql_lock_data_dictionary(trx); + + /* Check which incomplete table definition to drop. */ + + if (sys_zip_dict_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_ZIP_DICT table."); + row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE); + } + if (sys_zip_dict_cols_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_ZIP_DICT_COLS table."); + row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Creating zip_dict and zip_dict_cols system tables."); + + /* We always want SYSTEM tables to be created inside the system + tablespace. */ + srv_file_per_table_backup = srv_file_per_table; + srv_file_per_table = 0; + + err = que_eval_sql( + NULL, + "PROCEDURE CREATE_SYS_ZIP_DICT_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE SYS_ZIP_DICT(\n" + " ID INT UNSIGNED NOT NULL,\n" + " NAME CHAR(" + STRINGIFY_ARG(ZIP_DICT_MAX_NAME_LENGTH) + ") NOT NULL,\n" + " DATA BLOB NOT NULL\n" + ");\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_ID" + " ON SYS_ZIP_DICT (ID);\n" + "CREATE UNIQUE INDEX SYS_ZIP_DICT_NAME" + " ON SYS_ZIP_DICT (NAME);\n" + "CREATE TABLE SYS_ZIP_DICT_COLS(\n" + " TABLE_ID INT UNSIGNED NOT NULL,\n" + " COLUMN_POS INT UNSIGNED NOT NULL,\n" + " DICT_ID INT UNSIGNED NOT NULL\n" + ");\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_COLS_COMPOSITE" + " ON SYS_ZIP_DICT_COLS (TABLE_ID, COLUMN_POS);\n" + "END;\n", + FALSE, trx); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creation of SYS_ZIP_DICT and SYS_ZIP_DICT_COLS" + "has failed with error %lu. Tablespace is full. " + "Dropping incompletely created tables.", + (ulong) err); + + ut_a(err == DB_OUT_OF_FILE_SPACE + || err == DB_TOO_MANY_CONCURRENT_TRXS); + + row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE); + row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE); + + if (err == DB_OUT_OF_FILE_SPACE) { + err = DB_MUST_GET_MORE_FILE_SPACE; + } + } + + trx_commit_for_mysql(trx); + + row_mysql_unlock_data_dictionary(trx); + + trx_free_for_mysql(trx); + + srv_file_per_table = srv_file_per_table_backup; + + if (err == DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_INFO, + "zip_dict and zip_dict_cols system tables created."); + } + + /* Note: The master thread has not been started at this point. */ + /* Confirm and move to the non-LRU part of the table LRU list. */ + + sys_zip_dict_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2); + ut_a(sys_zip_dict_err == DB_SUCCESS); + sys_zip_dict_cols_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT_COLS", + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, 1); + ut_a(sys_zip_dict_cols_err == DB_SUCCESS); + + return(err); +} + /********************************************************************//** Add a single tablespace definition to the data dictionary tables in the database. @@ -1843,3 +1973,456 @@ dict_create_add_tablespace_to_dictionary( return(error); } + +/** Add a single compression dictionary definition to the SYS_ZIP_DICT +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + ut_ad(data); + + pars_info_t* info = pars_info_create(); + + pars_info_add_literal(info, "name", name, name_len, + DATA_VARCHAR, DATA_ENGLISH); + pars_info_add_literal(info, "data", data, data_len, + DATA_BLOB, DATA_BINARY_TYPE | DATA_NOT_NULL); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + " max_id INT;\n" + "DECLARE CURSOR cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " ORDER BY ID DESC;\n" + "BEGIN\n" + " max_id := 0;\n" + " OPEN cur;\n" + " FETCH cur INTO max_id;\n" + " IF (cur % NOTFOUND) THEN\n" + " max_id := 0;\n" + " END IF;\n" + " CLOSE cur;\n" + " INSERT INTO SYS_ZIP_DICT VALUES" + " (max_id + 1, :name, :data);\n" + "END;\n", + FALSE, trx); + + return error; +} + +/** Fetch callback, just stores extracted zip_dict id in the external +variable. +@return TRUE if all OK */ +static +ibool +dict_create_extract_int_aux( + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: int32 id */ +{ + sel_node_t* node = static_cast(row); + dfield_t* dfield = que_node_get_val(node->select_list); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == sizeof(ib_uint32_t)); + + memcpy(user_arg, dfield_get_data(dfield), sizeof(ib_uint32_t)); + + return(TRUE); +} + +/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "table_id", table_id); + pars_info_add_int4_literal(info, "column_pos", column_pos); + pars_info_add_int4_literal(info, "dict_id", dict_id); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + " INSERT INTO SYS_ZIP_DICT_COLS VALUES" + " (:table_id, :column_pos, :dict_id);\n" + "END;\n", + FALSE, trx); + return error; +} + +/** Get a single compression dictionary id for the given +(table id, column pos) pair. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(dict_id); + + pars_info_t* info = pars_info_create(); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf ), + ULINT32_UNDEFINED); + + pars_info_add_int4_literal(info, "table_id", table_id); + pars_info_add_int4_literal(info, "column_pos", column_pos); + pars_info_bind_function( + info, "my_func", dict_create_extract_int_aux, &dict_id_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT DICT_ID FROM SYS_ZIP_DICT_COLS\n" + " WHERE TABLE_ID = :table_id AND\n" + " COLUMN_POS = :column_pos;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) + error = DB_RECORD_NOT_FOUND; + else + *dict_id = local_dict_id; + } + return error; +} + +/** Get compression dictionary id for the given name. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_name( + const char* dict_name, /*!< in: dict name */ + ulint dict_name_len, /*!< in: dict name length */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(dict_name); + ut_ad(dict_name_len); + ut_ad(dict_id); + + pars_info_t* info = pars_info_create(); + + pars_info_add_literal(info, "dict_name", dict_name, dict_name_len, + DATA_VARCHAR, DATA_ENGLISH); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf), + ULINT32_UNDEFINED); + pars_info_bind_function( + info, "my_func", dict_create_extract_int_aux, &dict_id_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " WHERE NAME = :dict_name;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) + error = DB_RECORD_NOT_FOUND; + else + *dict_id = local_dict_id; + } + return error; +} + +/** Auxiliary enum used to indicate zip dict data extraction result code */ +enum zip_dict_info_aux_code { + zip_dict_info_success, /*!< success */ + zip_dict_info_not_found, /*!< zip dict record not found */ + zip_dict_info_oom, /*!< out of memory */ + zip_dict_info_corrupted_name, /*!< corrupted zip dict name */ + zip_dict_info_corrupted_data /*!< corrupted zip dict data */ +}; + +/** Auxiliary struct used to return zip dict info aling with result code */ +struct zip_dict_info_aux { + LEX_STRING name; /*!< zip dict name */ + LEX_STRING data; /*!< zip dict data */ + int code; /*!< result code (0 - success) */ +}; + +/** Fetch callback, just stores extracted zip_dict data in the external +variable. +@return always returns TRUE */ +static +ibool +dict_create_get_zip_dict_info_by_id_aux( + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to zip_dict_info_aux* */ +{ + sel_node_t* node = static_cast(row); + zip_dict_info_aux* result = + static_cast(user_arg); + + result->code = zip_dict_info_success; + result->name.str = 0; + result->name.length = 0; + result->data.str = 0; + result->data.length = 0; + + /* NAME field */ + que_node_t* exp = node->select_list; + ut_a(exp != 0); + + dfield_t* dfield = que_node_get_val(exp); + dtype_t* type = dfield_get_type(dfield); + ut_a(dtype_get_mtype(type) == DATA_VARCHAR); + + ulint len = dfield_get_len(dfield); + void* data = dfield_get_data(dfield); + + + if (len == UNIV_SQL_NULL) { + result->code = zip_dict_info_corrupted_name; + } + else { + result->name.str = + static_cast(my_malloc(len + 1, MYF(0))); + if (result->name.str == 0) { + result->code = zip_dict_info_oom; + } + else { + memcpy(result->name.str, data, len); + result->name.str[len] = '\0'; + result->name.length = len; + } + } + + /* DATA field */ + exp = que_node_get_next(exp); + ut_a(exp != 0); + + dfield = que_node_get_val(exp); + type = dfield_get_type(dfield); + ut_a(dtype_get_mtype(type) == DATA_BLOB); + + len = dfield_get_len(dfield); + data = dfield_get_data(dfield); + + if (len == UNIV_SQL_NULL) { + result->code = zip_dict_info_corrupted_data; + } + else { + result->data.str = + static_cast(my_malloc( + len == 0 ? 1 : len, MYF(0))); + if (result->data.str == 0) { + result->code = zip_dict_info_oom; + } + else { + memcpy(result->data.str, data, len); + result->data.length = len; + } + } + + ut_ad(que_node_get_next(exp) == 0); + + if (result->code != zip_dict_info_success) { + if (result->name.str == 0) { + mem_free(result->name.str); + result->name.str = 0; + result->name.length = 0; + } + if (result->data.str == 0) { + mem_free(result->data.str); + result->data.str = 0; + result->data.length = 0; + } + } + + return TRUE; +} + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory for name and data on success. +Must be freed with mem_free(). +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_info_by_id( + ulint dict_id, /*!< in: dict id */ + char** name, /*!< out: dict name */ + ulint* name_len, /*!< out: dict name length*/ + char** data, /*!< out: dict data */ + ulint* data_len, /*!< out: dict data length*/ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + ut_ad(data); + + zip_dict_info_aux rec; + rec.code = zip_dict_info_not_found; + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "id", dict_id); + pars_info_bind_function( + info, "my_func", dict_create_get_zip_dict_info_by_id_aux, + &rec); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT NAME, DATA FROM SYS_ZIP_DICT\n" + " WHERE ID = :id;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + switch (rec.code) { + case zip_dict_info_success: + *name = rec.name.str; + *name_len = rec.name.length; + *data = rec.data.str; + *data_len = rec.data.length; + break; + case zip_dict_info_not_found: + error = DB_RECORD_NOT_FOUND; + break; + case zip_dict_info_oom: + error = DB_OUT_OF_MEMORY; + break; + case zip_dict_info_corrupted_name: + case zip_dict_info_corrupted_data: + error = DB_INVALID_NULL; + break; + default: + ut_error; + } + } + return error; +} + +/** Remove a single compression dictionary from the data dictionary +tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + + pars_info_t* info = pars_info_create(); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf), + ULINT32_UNDEFINED); + ib_uint32_t counter_buf; + mach_write_to_4(reinterpret_cast(&counter_buf), + ULINT32_UNDEFINED); + + pars_info_add_literal(info, "name", name, name_len, + DATA_VARCHAR, DATA_ENGLISH); + pars_info_bind_int4_literal(info, "dict_id", &dict_id_buf); + pars_info_bind_function(info, "find_dict_func", + dict_create_extract_int_aux, &dict_id_buf); + pars_info_bind_function(info, "count_func", + dict_create_extract_int_aux, &counter_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION find_dict_func;\n" + "DECLARE FUNCTION count_func;\n" + "DECLARE CURSOR dict_cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " WHERE NAME = :name\n" + " FOR UPDATE;\n" + "DECLARE CURSOR ref_cur IS\n" + " SELECT 1 FROM SYS_ZIP_DICT_COLS\n" + " WHERE DICT_ID = :dict_id;\n" + "BEGIN\n" + " OPEN dict_cur;\n" + " FETCH dict_cur INTO find_dict_func();\n" + " IF NOT (SQL % NOTFOUND) THEN\n" + " OPEN ref_cur;\n" + " FETCH ref_cur INTO count_func();\n" + " IF SQL % NOTFOUND THEN\n" + " DELETE FROM SYS_ZIP_DICT WHERE CURRENT OF dict_cur;\n" + " END IF;\n" + " CLOSE ref_cur;\n" + " END IF;\n" + " CLOSE dict_cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) { + error = DB_RECORD_NOT_FOUND; + } + else { + ib_uint32_t local_counter = mach_read_from_4( + reinterpret_cast(&counter_buf)); + if (local_counter != ULINT32_UNDEFINED) + error = DB_ROW_IS_REFERENCED; + } + } + return error; +} + +/** Remove all compression dictionary references for the given table ID from +the data dictionary tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict_references_for_table( + ulint table_id, /*!< in: table id */ + trx_t* trx) /*!< in/out: transaction */ +{ + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "table_id", table_id); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + " DELETE FROM SYS_ZIP_DICT_COLS\n" + " WHERE TABLE_ID = :table_id;\n" + "END;\n", + FALSE, trx); + return error; +} diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index f1fbf25c3a6..57dd6cfa04d 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -6781,3 +6781,161 @@ dict_tf_to_row_format_string( return(0); } #endif /* !UNIV_HOTBACKUP */ + +/** Insert a records into SYS_ZIP_DICT. +@retval DB_SUCCESS if OK +@retval dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_create_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len, /*!< in: zip_dict name length*/ + const char* data, /*!< in: zip_dict data */ + ulint data_len) /*!< in: zip_dict data length */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + ut_ad(name); + ut_ad(data); + + rw_lock_x_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "insert zip_dict"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_add_zip_dict(name, name_len, data, data_len, trx); + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + } + else { + trx->op_info = "rollback of internal trx on zip_dict table"; + trx_rollback_to_savepoint(trx, NULL); + ut_a(trx->error_state == DB_SUCCESS); + } + trx->op_info = ""; + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_x_unlock(&dict_operation_lock); + + return err; +} +/** Get single compression dictionary id for the given +(table id, column pos) pair. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_id_by_key( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id) /*!< out: zip_dict id */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + rw_lock_s_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "get zip dict id by composite key"; + trx->dict_operation_lock_mode = RW_S_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_get_zip_dict_id_by_reference(table_id, column_pos, + dict_id, trx); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_s_unlock(&dict_operation_lock); + + return err; +} +/** Get compression dictionary info (name and data) for the given id. +Allocates memory in name->str and data->str on success. +Must be freed with mem_free(). +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_info_by_id( + ulint dict_id, /*!< in: table name */ + char** name, /*!< out: dictionary name */ + ulint* name_len, /*!< out: dictionary name length*/ + char** data, /*!< out: dictionary data */ + ulint* data_len) /*!< out: dictionary data length*/ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + rw_lock_s_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "get zip dict name and data by id"; + trx->dict_operation_lock_mode = RW_S_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_get_zip_dict_info_by_id(dict_id, name, name_len, + data, data_len, trx); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_s_unlock(&dict_operation_lock); + + return err; +} +/** Delete a record in SYS_ZIP_DICT with the given name. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found +@retval DB_ROW_IS_REFERENCED if in use */ +UNIV_INTERN +dberr_t +dict_drop_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len) /*!< in: zip_dict name length*/ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + ut_ad(name); + + rw_lock_x_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "delete zip_dict"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_remove_zip_dict(name, name_len, trx); + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + } + else { + trx->op_info = "rollback of internal trx on zip_dict table"; + trx_rollback_to_savepoint(trx, NULL); + ut_a(trx->error_state == DB_SUCCESS); + } + trx->op_info = ""; + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_x_unlock(&dict_operation_lock); + + return err; +} diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc index 988351dbca5..db2aa3239f5 100644 --- a/storage/xtradb/dict/dict0load.cc +++ b/storage/xtradb/dict/dict0load.cc @@ -56,7 +56,9 @@ static const char* SYSTEM_TABLE_NAME[] = { "SYS_FOREIGN", "SYS_FOREIGN_COLS", "SYS_TABLESPACES", - "SYS_DATAFILES" + "SYS_DATAFILES", + "SYS_ZIP_DICT", + "SYS_ZIP_DICT_COLS" }; /* If this flag is TRUE, then we will load the cluster index's (and tables') @@ -728,6 +730,161 @@ err_len: return(NULL); } +/** This function parses a SYS_ZIP_DICT record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict( + mem_heap_t* heap, /*!< in/out: heap memory */ + ulint zip_size, /*!< in: nonzero=compressed BLOB page size */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* id, /*!< out: dict id */ + const char** name, /*!< out: dict name */ + const char** data, /*!< out: dict data */ + ulint* data_len) /*!< out: dict data length */ +{ + ulint len; + const byte* field; + + /* Initialize the output values */ + *id = ULINT_UNDEFINED; + *name = NULL; + *data = NULL; + *data_len = 0; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_ZIP_DICT"); + } + + if (UNIV_UNLIKELY( + rec_get_n_fields_old(rec)!= DICT_NUM_FIELDS__SYS_ZIP_DICT)) { + return("wrong number of columns in SYS_ZIP_DICT record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *id = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__NAME, &len); + if (UNIV_UNLIKELY(len == 0 || len == UNIV_SQL_NULL)) { + goto err_len; + } + *name = mem_heap_strdupl(heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__DATA, &len); + if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { + goto err_len; + } + + if (rec_get_1byte_offs_flag(rec) == 0 && + rec_2_is_field_extern(rec, DICT_FLD__SYS_ZIP_DICT__DATA)) { + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(field + len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE))) { + goto err_len; + } + *data = reinterpret_cast( + btr_copy_externally_stored_field(data_len, field, + zip_size, len, heap)); + } + else { + *data_len = len; + *data = static_cast(mem_heap_dup(heap, field, len)); + } + + return(NULL); + +err_len: + return("incorrect column length in SYS_ZIP_DICT"); +} + +/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict_cols( + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* table_id, /*!< out: table id */ + ulint* column_pos, /*!< out: column position */ + ulint* dict_id) /*!< out: dict id */ +{ + ulint len; + const byte* field; + + /* Initialize the output values */ + *table_id = ULINT_UNDEFINED; + *column_pos = ULINT_UNDEFINED; + *dict_id = ULINT_UNDEFINED; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_ZIP_DICT_COLS"); + } + + if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) != + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS)) { + return("wrong number of columns in SYS_ZIP_DICT_COLS" + " record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { +err_len: + return("incorrect column length in SYS_ZIP_DICT_COLS"); + } + *table_id = mach_read_from_4(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *column_pos = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *dict_id = mach_read_from_4(field); + + return(NULL); +} /********************************************************************//** Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS. @return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */ diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index c1dbb5f91b9..57e415ae939 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -489,6 +489,8 @@ fil_space_get_by_id( ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), space->id == id); + /* The system tablespace must always be found */ + ut_ad(space || id != 0 || srv_is_being_started); return(space); } diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 25059db96b0..a0f0fab5566 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key; /** variable to record innodb_fts_internal_tbl_name for information schema table INNODB_FTS_INSERTED etc. */ UNIV_INTERN char* fts_internal_tbl_name = NULL; +UNIV_INTERN char* fts_internal_tbl_name2 = NULL; /** InnoDB default stopword list: There are different versions of stopwords, the stop words listed @@ -6569,6 +6570,36 @@ fts_check_corrupt_index( return(0); } +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len) +{ + fts_aux_table_t aux_table; + char* parent_table_name = NULL; + + if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) { + dict_table_t* parent_table; + + parent_table = dict_table_open_on_id( + aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL); + + if (parent_table != NULL) { + parent_table_name = mem_strdupl( + parent_table->name, + strlen(parent_table->name)); + + dict_table_close(parent_table, TRUE, FALSE); + } + } + + return(parent_table_name); +} + /** Check the validity of the parent table. @param[in] aux_table auxiliary table @return true if it is a valid table or false if it is not */ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index f00d11bd870..58d638d0b0c 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1361,6 +1361,29 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ +/** Creates a new compression dictionary. */ +static +handler_create_zip_dict_result +innobase_create_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len, + /*!< in/out: zip dictionary name length */ + const char* data, /*!< in: zip dictionary data */ + ulint* data_len); + /*!< in/out: zip dictionary data length */ + +/** Drops a existing compression dictionary. */ +static +handler_drop_zip_dict_result +innobase_drop_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len); + /*!< in/out: zip dictionary name length */ + /*************************************************************//** Checks if buffer pool is big enough to enable backoff algorithm. InnoDB empty free list algorithm backoff requires free pages @@ -3422,6 +3445,9 @@ innobase_init( innobase_hton->kill_connection = innobase_kill_connection; + innobase_hton->create_zip_dict = innobase_create_zip_dict; + innobase_hton->drop_zip_dict = innobase_drop_zip_dict; + ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); #ifndef DBUG_OFF @@ -4100,6 +4126,89 @@ innobase_purge_changed_page_bitmaps( return (my_bool)log_online_purge_changed_page_bitmaps(lsn); } +/** Creates a new compression dictionary. */ +static +handler_create_zip_dict_result +innobase_create_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len, + /*!< in/out: zip dictionary name length */ + const char* data, /*!< in: zip dictionary data */ + ulint* data_len) + /*!< in/out: zip dictionary data length */ +{ + handler_create_zip_dict_result result = + HA_CREATE_ZIP_DICT_UNKNOWN_ERROR; + + DBUG_ENTER("innobase_create_zip_dict"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + if (UNIV_UNLIKELY(high_level_read_only)) { + DBUG_RETURN(HA_CREATE_ZIP_DICT_READ_ONLY); + } + + if (UNIV_UNLIKELY(*name_len > ZIP_DICT_MAX_NAME_LENGTH)) { + *name_len = ZIP_DICT_MAX_NAME_LENGTH; + DBUG_RETURN(HA_CREATE_ZIP_DICT_NAME_TOO_LONG); + } + + if (UNIV_UNLIKELY(*data_len > ZIP_DICT_MAX_DATA_LENGTH)) { + *data_len = ZIP_DICT_MAX_DATA_LENGTH; + DBUG_RETURN(HA_CREATE_ZIP_DICT_DATA_TOO_LONG); + } + + switch (dict_create_zip_dict(name, *name_len, data, *data_len)) { + case DB_SUCCESS: + result = HA_CREATE_ZIP_DICT_OK; + break; + case DB_DUPLICATE_KEY: + result = HA_CREATE_ZIP_DICT_ALREADY_EXISTS; + break; + default: + ut_ad(0); + result = HA_CREATE_ZIP_DICT_UNKNOWN_ERROR; + } + DBUG_RETURN(result); +} + +/** Drops a existing compression dictionary. */ +static +handler_drop_zip_dict_result +innobase_drop_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len) + /*!< in/out: zip dictionary name length */ +{ + handler_drop_zip_dict_result result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR; + + DBUG_ENTER("innobase_drop_zip_dict"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + if (UNIV_UNLIKELY(high_level_read_only)) { + DBUG_RETURN(HA_DROP_ZIP_DICT_READ_ONLY); + } + + switch (dict_drop_zip_dict(name, *name_len)) { + case DB_SUCCESS: + result = HA_DROP_ZIP_DICT_OK; + break; + case DB_RECORD_NOT_FOUND: + result = HA_DROP_ZIP_DICT_DOES_NOT_EXIST; + break; + case DB_ROW_IS_REFERENCED: + result = HA_DROP_ZIP_DICT_IS_REFERENCED; + break; + default: + ut_ad(0); + result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR; + } + DBUG_RETURN(result); +} + /*****************************************************************//** Check whether this is a fake change transaction. @return TRUE if a fake change transaction */ @@ -5460,6 +5569,86 @@ func_exit: DBUG_RETURN(ret); } +/** This function checks if all the compression dictionaries referenced +in table->fields exist in SYS_ZIP_DICT InnoDB system table. +@return true if all referenced dictionaries exist */ +UNIV_INTERN +bool +innobase_check_zip_dicts( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + ulint* dict_ids, /*!< out: identified zip dict ids + (at least n_fields long) */ + trx_t* trx, /*!< in: transaction */ + const char** err_dict_name) /*!< out: the name of the + zip_dict which does not exist. */ +{ + DBUG_ENTER("innobase_check_zip_dicts"); + + bool res = true; + dberr_t err = DB_SUCCESS; + const size_t n_fields = table->s->fields; + + Field* field_ptr; + for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields; + ++field_idx) + { + field_ptr = table->field[field_idx]; + if (field_ptr->has_associated_compression_dictionary()) { + err = dict_create_get_zip_dict_id_by_name( + field_ptr->zip_dict_name.str, + field_ptr->zip_dict_name.length, + &dict_ids[field_idx], + trx); + ut_a(err == DB_SUCCESS || err == DB_RECORD_NOT_FOUND); + } + else { + dict_ids[field_idx] = ULINT_UNDEFINED; + } + + } + + if (err != DB_SUCCESS) { + res = false; + *err_dict_name = field_ptr->zip_dict_name.str; + } + + DBUG_RETURN(res); +} + +/** This function creates compression dictionary references in +SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info +in table->fields and provided zip dict ids. */ +UNIV_INTERN +void +innobase_create_zip_dict_references( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + table_id_t ib_table_id, /*!< in: table ID in Innodb data + dictionary */ + ulint* zip_dict_ids, /*!< in: zip dict ids + (at least n_fields long) */ + trx_t* trx) /*!< in: transaction */ +{ + DBUG_ENTER("innobase_create_zip_dict_references"); + + dberr_t err = DB_SUCCESS; + const size_t n_fields = table->s->fields; + + for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields; + ++field_idx) + { + if (zip_dict_ids[field_idx] != ULINT_UNDEFINED) { + err = dict_create_add_zip_dict_reference(ib_table_id, + table->field[field_idx]->field_index, + zip_dict_ids[field_idx], trx); + ut_a(err == DB_SUCCESS); + } + } + + DBUG_VOID_RETURN; +} + /*******************************************************************//** This function uses index translation table to quickly locate the requested index structure. @@ -6749,7 +6938,12 @@ ha_innobase::store_key_val_for_row( blob_data = row_mysql_read_blob_ref(&blob_len, (byte*) (record + (ulint) get_field_offset(table, field)), - (ulint) field->pack_length()); + (ulint) field->pack_length(), + field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast( + field->zip_dict_data.str), + field->zip_dict_data.length, prebuilt); true_len = blob_len; @@ -7004,6 +7198,9 @@ build_template_field( templ->mbminlen = dict_col_get_mbminlen(col); templ->mbmaxlen = dict_col_get_mbmaxlen(col); templ->is_unsigned = col->prtype & DATA_UNSIGNED; + templ->compressed = (field->column_format() + == COLUMN_FORMAT_TYPE_COMPRESSED); + templ->zip_dict_data = field->zip_dict_data; if (!dict_index_is_clust(index) && templ->rec_field_no == ULINT_UNDEFINED) { @@ -7761,8 +7958,11 @@ calc_row_difference( switch (col_type) { case DATA_BLOB: - o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); - n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); + /* Do not compress blob column while comparing*/ + o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len, + false, 0, 0, prebuilt); + n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len, + false, 0, 0, prebuilt); break; @@ -7832,7 +8032,13 @@ calc_row_difference( TRUE, new_mysql_row_col, col_pack_len, - dict_table_is_comp(prebuilt->table)); + dict_table_is_comp(prebuilt->table), + field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast( + field->zip_dict_data.str), + field->zip_dict_data.length, + prebuilt); dfield_copy(&ufield->new_val, &dfield); } else { dfield_set_null(&ufield->new_val); @@ -9503,6 +9709,7 @@ create_table_def( ulint unsigned_type; ulint binary_type; ulint long_true_varchar; + ulint compressed; ulint charset_no; ulint i; ulint doc_id_col = 0; @@ -9649,6 +9856,13 @@ create_table_def( } } + /* Check if the the field has COMPRESSED attribute */ + compressed = 0; + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + compressed = DATA_COMPRESSED; + } + /* First check whether the column to be added has a system reserved name. */ if (dict_col_name_is_reserved(field->field_name)){ @@ -9669,7 +9883,8 @@ err_col: dtype_form_prtype( (ulint) field->type() | nulls_allowed | unsigned_type - | binary_type | long_true_varchar, + | binary_type | long_true_varchar + | compressed, charset_no), col_len); } @@ -10505,6 +10720,10 @@ ha_innobase::create( const char* stmt; size_t stmt_len; + mem_heap_t* heap = 0; + ulint* zip_dict_ids = 0; + const char* err_zip_dict_name = 0; + DBUG_ENTER("ha_innobase::create"); DBUG_ASSERT(thd != NULL); @@ -10595,6 +10814,18 @@ ha_innobase::create( row_mysql_lock_data_dictionary(trx); + heap = mem_heap_create(form->s->fields * sizeof(ulint)); + zip_dict_ids = static_cast( + mem_heap_alloc(heap, form->s->fields * sizeof(ulint))); + + if (!innobase_check_zip_dicts(form, zip_dict_ids, + trx, &err_zip_dict_name)) { + error = -1; + my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST, + MYF(0), err_zip_dict_name); + goto cleanup; + } + error = create_table_def(trx, form, norm_name, temp_path, remote_path, flags, flags2); if (error) { @@ -10702,6 +10933,22 @@ ha_innobase::create( dict_table_get_all_fts_indexes(innobase_table, fts->indexes); } + /* + Adding compression dictionary <-> compressed table column links + to the SYS_ZIP_DICT_COLS table. + */ + ut_a(zip_dict_ids != 0); + { + dict_table_t* local_table = dict_table_open_on_name( + norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); + + ut_a(local_table); + table_id_t table_id = local_table->id; + dict_table_close(local_table, TRUE, FALSE); + innobase_create_zip_dict_references(form, + table_id, zip_dict_ids, trx); + } + stmt = innobase_get_stmt(thd, &stmt_len); if (stmt) { @@ -10818,6 +11065,9 @@ ha_innobase::create( trx_free_for_mysql(trx); + if (heap != 0) + mem_heap_free(heap); + DBUG_RETURN(0); cleanup: @@ -10827,6 +11077,9 @@ cleanup: trx_free_for_mysql(trx); + if (heap != 0) + mem_heap_free(heap); + DBUG_RETURN(error); } @@ -11904,6 +12157,14 @@ ha_innobase::info_low( if (dict_stats_is_persistent_enabled(ib_table)) { if (is_analyze) { + + /* If this table is already queued for + background analyze, remove it from the + queue as we are about to do the same */ + dict_mutex_enter_for_mysql(); + dict_stats_recalc_pool_del(ib_table); + dict_mutex_exit_for_mysql(); + opt = DICT_STATS_RECALC_PERSISTENT; } else { /* This is e.g. 'SHOW INDEXES', fetch @@ -13050,6 +13311,11 @@ ha_innobase::extra( if (prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(prebuilt); } + + if (prebuilt->compress_heap) { + row_mysql_prebuilt_free_compress_heap(prebuilt); + } + break; case HA_EXTRA_RESET_STATE: reset_template(); @@ -13101,6 +13367,10 @@ ha_innobase::reset() row_mysql_prebuilt_free_blob_heap(prebuilt); } + if (prebuilt->compress_heap) { + row_mysql_prebuilt_free_compress_heap(prebuilt); + } + reset_template(); ds_mrr.reset(); @@ -13300,7 +13570,11 @@ ha_innobase::external_lock( && lock_type == F_WRLCK) || thd_sql_command(thd) == SQLCOM_CREATE_INDEX || thd_sql_command(thd) == SQLCOM_DROP_INDEX - || thd_sql_command(thd) == SQLCOM_DELETE)) { + || thd_sql_command(thd) == SQLCOM_DELETE + || thd_sql_command(thd) == + SQLCOM_CREATE_COMPRESSION_DICTIONARY + || thd_sql_command(thd) == + SQLCOM_DROP_COMPRESSION_DICTIONARY)) { if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) { @@ -14062,7 +14336,9 @@ ha_innobase::store_lock( && lock_type <= TL_WRITE)) || sql_command == SQLCOM_CREATE_INDEX || sql_command == SQLCOM_DROP_INDEX - || sql_command == SQLCOM_DELETE)) { + || sql_command == SQLCOM_DELETE + || sql_command == SQLCOM_CREATE_COMPRESSION_DICTIONARY + || sql_command == SQLCOM_DROP_COMPRESSION_DICTIONARY)) { ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); @@ -15001,6 +15277,82 @@ ha_innobase::check_if_incompatible_data( return(COMPATIBLE_DATA_YES); } +/** This function reads zip dict-related info from SYS_ZIP_DICT +and SYS_ZIP_DICT_COLS for all columns marked with +COLUMN_FORMAT_TYPE_COMPRESSED flag and updates +zip_dict_name / zip_dict_data for those which have associated +compression dictionaries. +*/ +UNIV_INTERN +void +ha_innobase::update_field_defs_with_zip_dict_info() +{ + DBUG_ENTER("update_field_defs_with_zip_dict_info"); + ut_ad(!mutex_own(&dict_sys->mutex)); + + char norm_name[FN_REFLEN]; + normalize_table_name(norm_name, table_share->normalized_path.str); + + dict_table_t* ib_table = dict_table_open_on_name( + norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); + + /* if dict_table_open_on_name() returns NULL, then it means that + TABLE_SHARE is populated for a table being created and we can + skip filling zip dict info here */ + if (ib_table == 0) + DBUG_VOID_RETURN; + + table_id_t ib_table_id = ib_table->id; + dict_table_close(ib_table, FALSE, FALSE); + Field* field; + for (uint i = 0; i < table_share->fields; ++i) { + field = table_share->field[i]; + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + bool reference_found = false; + ulint dict_id = 0; + switch (dict_get_dictionary_id_by_key(ib_table_id, i, + &dict_id)) { + case DB_SUCCESS: + reference_found = true; + break; + case DB_RECORD_NOT_FOUND: + reference_found = false; + break; + default: + ut_error; + } + if (reference_found) { + char* local_name = 0; + ulint local_name_len = 0; + char* local_data = 0; + ulint local_data_len = 0; + if (dict_get_dictionary_info_by_id(dict_id, + &local_name, &local_name_len, + &local_data, &local_data_len) != + DB_SUCCESS) { + ut_error; + } + else { + field->zip_dict_name.str = + local_name; + field->zip_dict_name.length = + local_name_len; + field->zip_dict_data.str = + local_data; + field->zip_dict_data.length = + local_data_len; + } + } + else { + field->zip_dict_name = null_lex_cstr; + field->zip_dict_data = null_lex_cstr; + } + } + } + DBUG_VOID_RETURN; +} + /****************************************************************//** Update the system variable innodb_io_capacity_max using the "saved" value. This function is registered as a callback with MySQL. */ @@ -15555,7 +15907,12 @@ innodb_internal_table_update( my_free(old); } - fts_internal_tbl_name = *(char**) var_ptr; + fts_internal_tbl_name2 = *(char**) var_ptr; + if (fts_internal_tbl_name2 == NULL) { + fts_internal_tbl_name = const_cast("default"); + } else { + fts_internal_tbl_name = fts_internal_tbl_name2; + } } /****************************************************************//** @@ -17888,7 +18245,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache, "Whether to disable OS system file cache for sort I/O", NULL, NULL, FALSE); -static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name, +static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2, PLUGIN_VAR_NOCMDARG, "FTS internal auxiliary table to be checked", innodb_internal_table_validate, @@ -18340,6 +18697,19 @@ static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks, "not take any locks at all.", NULL, NULL, TRUE); +static MYSQL_SYSVAR_UINT(compressed_columns_zip_level, + srv_compressed_columns_zip_level, + PLUGIN_VAR_RQCMDARG, + "Compression level used for compressed columns. 0 is no compression" + ", 1 is fastest and 9 is best compression. Default is 6.", + NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); + +static MYSQL_SYSVAR_ULONG(compressed_columns_threshold, + srv_compressed_columns_threshold, + PLUGIN_VAR_RQCMDARG, + "Compress column data if its length exceeds this value. Default is 96", + NULL, NULL, 96, 1, ~0UL, 0); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -18537,6 +18907,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(fake_changes), MYSQL_SYSVAR(locking_fake_changes), MYSQL_SYSVAR(tmpdir), + MYSQL_SYSVAR(compressed_columns_zip_level), + MYSQL_SYSVAR(compressed_columns_threshold), NULL }; @@ -18559,6 +18931,8 @@ mysql_declare_plugin(innobase) i_s_xtradb_read_view, i_s_xtradb_internal_hash_tables, i_s_xtradb_rseg, +i_s_xtradb_zip_dict, +i_s_xtradb_zip_dict_cols, i_s_innodb_trx, i_s_innodb_locks, i_s_innodb_lock_waits, diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index c9f9cfabc1f..609787bd6a1 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -287,6 +287,15 @@ class ha_innobase: public handler /** @} */ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); + + /** This function reads zip dict-related info from SYS_ZIP_DICT + and SYS_ZIP_DICT_COLS for all columns marked with + COLUMN_FORMAT_TYPE_COMPRESSED flag and updates + zip_dict_name / zip_dict_data for those which have associated + compression dictionaries. + */ + virtual void update_field_defs_with_zip_dict_info(); + private: /** Builds a 'template' to the prebuilt struct. @@ -665,3 +674,31 @@ innobase_build_index_translation( INNOBASE_SHARE* share); /*!< in/out: share structure where index translation table will be constructed in. */ + +/** This function checks if all the compression dictionaries referenced +in table->fields exist in SYS_ZIP_DICT InnoDB system table. +@return true if all referenced dictionaries exist */ +UNIV_INTERN +bool +innobase_check_zip_dicts( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + ulint* dict_ids, /*!< out: identified zip dict ids + (at least n_fields long) */ + trx_t* trx, /*!< in: transaction */ + const char** err_dict_name); /*!< out: the name of the + zip_dict which does not exist. */ + +/** This function creates compression dictionary references in +SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info +in table->fields and provided zip dict ids. */ +UNIV_INTERN +void +innobase_create_zip_dict_references( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + table_id_t ib_table_id, /*!< in: table ID in Innodb data + dictionary */ + ulint* zip_dict_ids, /*!< in: zip dict ids + (at least n_fields long) */ + trx_t* trx); /*!< in: transaction */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 1a39f70614d..291ed06a955 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -201,7 +201,10 @@ innobase_need_rebuild( /*==================*/ const Alter_inplace_info* ha_alter_info) { - if (ha_alter_info->handler_flags + Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags = + ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE); + + if (alter_inplace_flags == Alter_inplace_info::CHANGE_CREATE_OPTION && !(ha_alter_info->create_info->used_fields & (HA_CREATE_USED_ROW_FORMAT @@ -1069,6 +1072,15 @@ innobase_col_to_mysql( field->reset(); if (field->type() == MYSQL_TYPE_VARCHAR) { + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + /* Skip compressed varchar column when + reporting an erroneous row + during index creation or table rebuild. */ + field->set_null(); + break; + } + /* This is a >= 5.0.3 type true VARCHAR. Store the length of the data to the first byte or the first two bytes of dest. */ @@ -2328,7 +2340,8 @@ innobase_build_col_map_add( mem_heap_t* heap, dfield_t* dfield, const Field* field, - ulint comp) + ulint comp, + row_prebuilt_t* prebuilt) { if (field->is_real_null()) { dfield_set_null(dfield); @@ -2340,7 +2353,10 @@ innobase_build_col_map_add( byte* buf = static_cast(mem_heap_alloc(heap, size)); row_mysql_store_col_in_innobase_format( - dfield, buf, TRUE, field->ptr, size, comp); + dfield, buf, TRUE, field->ptr, size, comp, + field->column_format() == COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast(field->zip_dict_data.str), + field->zip_dict_data.length, prebuilt); } /** Construct the translation table for reordering, dropping or @@ -2365,7 +2381,8 @@ innobase_build_col_map( const dict_table_t* new_table, const dict_table_t* old_table, dtuple_t* add_cols, - mem_heap_t* heap) + mem_heap_t* heap, + row_prebuilt_t* prebuilt) { DBUG_ENTER("innobase_build_col_map"); DBUG_ASSERT(altered_table != table); @@ -2404,7 +2421,7 @@ innobase_build_col_map( innobase_build_col_map_add( heap, dtuple_get_nth_field(add_cols, i), altered_table->field[i], - dict_table_is_comp(new_table)); + dict_table_is_comp(new_table), prebuilt); found_col: i++; } @@ -2567,7 +2584,8 @@ prepare_inplace_alter_table_dict( ulint flags2, ulint fts_doc_id_col, bool add_fts_doc_id, - bool add_fts_doc_id_idx) + bool add_fts_doc_id_idx, + row_prebuilt_t* prebuilt) { bool dict_locked = false; ulint* add_key_nums; /* MySQL key numbers */ @@ -2578,6 +2596,7 @@ prepare_inplace_alter_table_dict( dberr_t error; ulint num_fts_index; ha_innobase_inplace_ctx*ctx; + ulint* zip_dict_ids = 0; DBUG_ENTER("prepare_inplace_alter_table_dict"); @@ -2712,6 +2731,18 @@ prepare_inplace_alter_table_dict( ctx->new_table->id); ulint n_cols; dtuple_t* add_cols; + const char* err_zip_dict_name = 0; + + zip_dict_ids = static_cast( + mem_heap_alloc(ctx->heap, + altered_table->s->fields * sizeof(ulint))); + + if (!innobase_check_zip_dicts(altered_table, zip_dict_ids, + ctx->trx, &err_zip_dict_name)) { + my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST, + MYF(0), err_zip_dict_name); + goto new_clustered_failed; + } if (innobase_check_foreigns( ha_alter_info, altered_table, old_table, @@ -2815,6 +2846,12 @@ prepare_inplace_alter_table_dict( } } + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + field_type |= DATA_COMPRESSED; + } + + if (dict_col_name_is_reserved(field->field_name)) { dict_mem_table_free(ctx->new_table); my_error(ER_WRONG_COLUMN_NAME, MYF(0), @@ -2894,7 +2931,7 @@ prepare_inplace_alter_table_dict( ctx->col_map = innobase_build_col_map( ha_alter_info, altered_table, old_table, ctx->new_table, user_table, - add_cols, ctx->heap); + add_cols, ctx->heap, prebuilt); ctx->add_cols = add_cols; } else { DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info)); @@ -3072,6 +3109,15 @@ op_ok: DBUG_ASSERT(error == DB_SUCCESS); + /* + Adding compression dictionary <-> compressed table column links + to the SYS_ZIP_DICT_COLS table. + */ + if (zip_dict_ids != 0) { + innobase_create_zip_dict_references(altered_table, + ctx->trx->table_id, zip_dict_ids, ctx->trx); + } + /* Commit the data dictionary transaction in order to release the table locks on the system tables. This means that if MySQL crashes while creating a new primary key inside @@ -3767,7 +3813,7 @@ err_exit: } if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA) - || (ha_alter_info->handler_flags + || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info))) { @@ -3893,7 +3939,7 @@ found_col: table_share->table_name.str, flags, flags2, fts_doc_col_no, add_fts_doc_id, - add_fts_doc_id_idx)); + add_fts_doc_id_idx, prebuilt)); } /** Alter the table structure in-place with operations @@ -3933,7 +3979,7 @@ ok_exit: DBUG_RETURN(false); } - if (ha_alter_info->handler_flags + if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info)) { goto ok_exit; diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index dfdad55ec3b..b351e464a1e 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -4050,6 +4050,8 @@ i_s_fts_config_fill( DBUG_RETURN(0); } + DEBUG_SYNC_C("i_s_fts_config_fille_check"); + fields = table->field; /* Prevent DDL to drop fts aux tables. */ diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc index 213e3c1aa53..91763784476 100644 --- a/storage/xtradb/handler/xtradb_i_s.cc +++ b/storage/xtradb/handler/xtradb_i_s.cc @@ -32,9 +32,11 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include "srv0start.h" /* for srv_was_started */ +#include /* btr_pcur_t */ #include /* btr_search_sys */ #include /* recv_sys */ #include +#include /* for ZIP_DICT_MAX_* constants */ /* for XTRADB_RSEG table */ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ @@ -130,6 +132,28 @@ field_store_string( return(ret); } +/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB +field. +@return 0 on success */ +static +int +field_store_blob( + Field* field, /*!< in/out: target field for storage */ + const char* data, /*!< in: pointer to data, or NULL */ + uint data_len) /*!< in: data length */ +{ + int ret; + + if (data != NULL) { + ret = field->store(data, data_len, system_charset_info); + field->set_notnull(); + } else { + ret = 0; /* success */ + field->set_null(); + } + + return(ret); +} static int @@ -603,3 +627,329 @@ UNIV_INTERN struct st_mysql_plugin i_s_xtradb_rseg = STRUCT_FLD(__reserved1, NULL), STRUCT_FLD(flags, 0UL), }; + + +/************************************************************************/ +enum zip_dict_field_type +{ + zip_dict_field_id, + zip_dict_field_name, + zip_dict_field_zip_dict +}; + +static ST_FIELD_INFO xtradb_sys_zip_dict_fields_info[] = +{ + { STRUCT_FLD(field_name, "id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "name"), + STRUCT_FLD(field_length, ZIP_DICT_MAX_NAME_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "zip_dict"), + STRUCT_FLD(field_length, ZIP_DICT_MAX_DATA_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_BLOB), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + END_OF_ST_FIELD_INFO +}; + +/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT with information +collected by scanning SYS_ZIP_DICT table. +@return 0 on success */ +static +int +xtradb_i_s_dict_fill_sys_zip_dict( + THD* thd, /*!< in: thread */ + ulint id, /*!< in: dict ID */ + const char* name, /*!< in: dict name */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict"); + + Field** fields = table_to_fill->field; + + OK(field_store_ulint(fields[zip_dict_field_id], id)); + OK(field_store_string(fields[zip_dict_field_name], name)); + OK(field_store_blob(fields[zip_dict_field_zip_dict], data, + data_len)); + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} + +/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT table. +Loop through each record in SYS_ZIP_DICT, and extract the column +information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT table. +@return 0 on success */ +static +int +xtradb_i_s_sys_zip_dict_fill_table( + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("xtradb_i_s_sys_zip_dict_fill_table"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT); + ulint zip_size = dict_table_zip_size(pcur.btr_cur.index->table); + + while (rec) { + const char* err_msg; + ulint id; + const char* name; + const char* data; + ulint data_len; + + /* Extract necessary information from a SYS_ZIP_DICT row */ + err_msg = dict_process_sys_zip_dict( + heap, zip_size, rec, &id, &name, &data, &data_len); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + xtradb_i_s_dict_fill_sys_zip_dict( + thd, id, name, data, data_len, + tables->table); + } else { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} + +static int i_s_xtradb_zip_dict_init(void* p) +{ + DBUG_ENTER("i_s_xtradb_zip_dict_init"); + + ST_SCHEMA_TABLE* schema = static_cast(p); + + schema->fields_info = xtradb_sys_zip_dict_fields_info; + schema->fill_table = xtradb_i_s_sys_zip_dict_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "XTRADB_ZIP_DICT"), + STRUCT_FLD(author, PLUGIN_AUTHOR), + STRUCT_FLD(descr, "InnoDB compression dictionaries information"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_xtradb_zip_dict_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, INNODB_VERSION_SHORT), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL), +}; + +enum zip_dict_cols_field_type +{ + zip_dict_cols_field_table_id, + zip_dict_cols_field_column_pos, + zip_dict_cols_field_dict_id +}; + +static ST_FIELD_INFO xtradb_sys_zip_dict_cols_fields_info[] = +{ + { STRUCT_FLD(field_name, "table_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "column_pos"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "dict_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + END_OF_ST_FIELD_INFO +}; + +/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS with information +collected by scanning SYS_ZIP_DICT_COLS table. +@return 0 on success */ +static +int +xtradb_i_s_dict_fill_sys_zip_dict_cols( + THD* thd, /*!< in: thread */ + ulint table_id, /*!< in: table ID */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict ID */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict_cols"); + + Field** fields = table_to_fill->field; + + OK(field_store_ulint(fields[zip_dict_cols_field_table_id], + table_id)); + OK(field_store_ulint(fields[zip_dict_cols_field_column_pos], + column_pos)); + OK(field_store_ulint(fields[zip_dict_cols_field_dict_id], + dict_id)); + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} + +/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table. +Loop through each record in SYS_ZIP_DICT_COLS, and extract the column +information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table. +@return 0 on success */ +static +int +xtradb_i_s_sys_zip_dict_cols_fill_table( + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("xtradb_i_s_sys_zip_dict_cols_fill_table"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT_COLS); + + while (rec) { + const char* err_msg; + ulint table_id; + ulint column_pos; + ulint dict_id; + + /* Extract necessary information from a SYS_ZIP_DICT_COLS + row */ + err_msg = dict_process_sys_zip_dict_cols( + heap, rec, &table_id, &column_pos, &dict_id); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + xtradb_i_s_dict_fill_sys_zip_dict_cols( + thd, table_id, column_pos, dict_id, + tables->table); + } else { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} + +static int i_s_xtradb_zip_dict_cols_init(void* p) +{ + DBUG_ENTER("i_s_xtradb_zip_dict_cols_init"); + + ST_SCHEMA_TABLE* schema = static_cast(p); + + schema->fields_info = xtradb_sys_zip_dict_cols_fields_info; + schema->fill_table = xtradb_i_s_sys_zip_dict_cols_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict_cols = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "XTRADB_ZIP_DICT_COLS"), + STRUCT_FLD(author, PLUGIN_AUTHOR), + STRUCT_FLD(descr, "InnoDB compressed columns information"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_xtradb_zip_dict_cols_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, INNODB_VERSION_SHORT), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL), +}; diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h index 2f7552c565a..905d84587af 100644 --- a/storage/xtradb/handler/xtradb_i_s.h +++ b/storage/xtradb/handler/xtradb_i_s.h @@ -22,5 +22,7 @@ this program; if not, write to the Free Software Foundation, Inc., extern struct st_mysql_plugin i_s_xtradb_read_view; extern struct st_mysql_plugin i_s_xtradb_internal_hash_tables; extern struct st_mysql_plugin i_s_xtradb_rseg; +extern struct st_mysql_plugin i_s_xtradb_zip_dict; +extern struct st_mysql_plugin i_s_xtradb_zip_dict_cols; #endif /* XTRADB_I_S_H */ diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h index 111664b0b52..f269c266efb 100644 --- a/storage/xtradb/include/data0type.h +++ b/storage/xtradb/include/data0type.h @@ -170,6 +170,9 @@ be less than 256 */ type when the column is true VARCHAR where MySQL uses 2 bytes to store the data len; for shorter VARCHARs MySQL uses only 1 byte */ +#define DATA_COMPRESSED 16384 /* this is ORed to the precise data + type when the column has COLUMN_FORMAT = + COMPRESSED attribute*/ /*-------------------------------------------*/ /* This many bytes we need to store the type information affecting the @@ -500,6 +503,17 @@ dtype_print( /*========*/ const dtype_t* type); /*!< in: type */ +/** +Calculates the number of extra bytes needed for compression header +depending on precise column type. +@reval 0 if prtype does not include DATA_COMPRESSED flag +@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag +*/ +UNIV_INLINE +ulint +prtype_get_compression_extra( + ulint prtype); /*!< in: precise type */ + /* Structure for an SQL data type. If you add fields to this structure, be sure to initialize them everywhere. This structure is initialized in the following functions: diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic index d489bef89a8..29dc480a19c 100644 --- a/storage/xtradb/include/data0type.ic +++ b/storage/xtradb/include/data0type.ic @@ -26,6 +26,7 @@ Created 1/16/1996 Heikki Tuuri #include /* strlen() */ #include "mach0data.h" +#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */ #ifndef UNIV_HOTBACKUP # include "ha_prototypes.h" @@ -709,3 +710,18 @@ dtype_get_sql_null_size( 0, 0)); #endif /* !UNIV_HOTBACKUP */ } + +/** +Calculates the number of extra bytes needed for compression header +depending on precise column type. +@reval 0 if prtype does not include DATA_COMPRESSED flag +@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag +*/ +UNIV_INLINE +ulint +prtype_get_compression_extra( + ulint prtype) /*!< in: precise type */ +{ + return (prtype & DATA_COMPRESSED) != 0 ? + ZIP_COLUMN_HEADER_LENGTH : 0; +} diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 477e1150f43..d5bee886cbf 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -324,6 +324,38 @@ enum dict_fld_sys_datafiles_enum { DICT_FLD__SYS_DATAFILES__PATH = 3, DICT_NUM_FIELDS__SYS_DATAFILES = 4 }; +/* The columns in SYS_DICT */ +enum dict_col_sys_zip_dict_enum { + DICT_COL__SYS_ZIP_DICT__ID = 0, + DICT_COL__SYS_ZIP_DICT__NAME = 1, + DICT_COL__SYS_ZIP_DICT__DATA = 2, + DICT_NUM_COLS__SYS_ZIP_DICT = 3 +}; +/* The field numbers in the SYS_DICT clustered index */ +enum dict_fld_sys_zip_dict_enum { + DICT_FLD__SYS_ZIP_DICT__ID = 0, + DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID = 1, + DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR = 2, + DICT_FLD__SYS_ZIP_DICT__NAME = 3, + DICT_FLD__SYS_ZIP_DICT__DATA = 4, + DICT_NUM_FIELDS__SYS_ZIP_DICT = 5 +}; +/* The columns in SYS_DICT_COLS */ +enum dict_col_sys_zip_dict_cols_enum { + DICT_COL__SYS_ZIP_DICT_COLS__TABLE_ID = 0, + DICT_COL__SYS_ZIP_DICT_COLS__COLUMN_POS = 1, + DICT_COL__SYS_ZIP_DICT_COLS__DICT_ID = 2, + DICT_NUM_COLS__SYS_ZIP_DICT_COLS = 3 +}; +/* The field numbers in the SYS_DICT_COLS clustered index */ +enum dict_fld_sys_zip_dict_cols_enum { + DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID = 0, + DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS = 1, + DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID = 2, + DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR = 3, + DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID = 4, + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS = 5 +}; /* A number of the columns above occur in multiple tables. These are the length of thos fields. */ diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index 6146917469a..33877b67834 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -152,6 +152,19 @@ UNIV_INTERN dberr_t dict_create_or_check_sys_tablespace(void); /*=====================================*/ + +#define ZIP_DICT_MAX_NAME_LENGTH 64 +/* Max window size (2^15) minus 262 */ +#define ZIP_DICT_MAX_DATA_LENGTH 32506 + +/** Creates the zip_dict system table inside InnoDB +at server bootstrap or server start if it is not found or is +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_sys_zip_dict(void); + /********************************************************************//** Add a single tablespace definition to the data dictionary tables in the database. @@ -167,6 +180,84 @@ dict_create_add_tablespace_to_dictionary( trx_t* trx, /*!< in: transaction */ bool commit); /*!< in: if true then commit the transaction */ + +/** Add a single compression dictionary definition to the SYS_ZIP_DICT +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get a single compression dictionary id for the given +(table id, column pos) pair. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get compression dictionary id for the given name. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_name( + const char* dict_name, /*!< in: dict name */ + ulint dict_name_len, /*!< in: dict name length */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory for name and data on success. +Must be freed with mem_free(). +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_info_by_id( + ulint dict_id, /*!< in: dict id */ + char** name, /*!< out: dict name */ + ulint* name_len, /*!< out: dict name length */ + char** data, /*!< out: dict data */ + ulint* data_len, /*!< out: dict data length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Remove a single compression dictionary from the data dictionary +tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Remove all compression dictionary references for the given table ID from +the data dictionary tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict_references_for_table( + ulint table_id, /*!< in: table id */ + trx_t* trx); /*!< in/out: transaction */ + /********************************************************************//** Add a foreign key definition to the data dictionary tables. @return error code or DB_SUCCESS */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index f539f62960b..870b142ba32 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1845,6 +1845,52 @@ dict_table_set_corrupt_by_space( ulint space_id, ibool need_mutex); +/** Insert a records into SYS_ZIP_DICT. +@retval DB_SUCCESS if OK +@retval dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_create_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len, /*!< in: zip_dict name length*/ + const char* data, /*!< in: zip_dict data */ + ulint data_len); /*!< in: zip_dict data length */ + +/** Get single compression dictionary id for the given +(table id, column pos) pair. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_id_by_key( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id); /*!< out: zip_dict id */ + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory in name->str and data->str on success. +Must be freed with mem_free(). +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_info_by_id( + ulint dict_id, /*!< in: table name */ + char** name, /*!< out: dictionary name */ + ulint* name_len, /*!< out: dictionary name length*/ + char** data, /*!< out: dictionary data */ + ulint* data_len); /*!< out: dictionary data length*/ + +/** Delete a record in SYS_ZIP_DICT with the given name. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found +@retval DB_ROW_IS_REFERENCED if in use */ +UNIV_INTERN +dberr_t +dict_drop_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len); /*!< in: zip_dict name length*/ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h index dcbc3de8e94..85e3e565637 100644 --- a/storage/xtradb/include/dict0load.h +++ b/storage/xtradb/include/dict0load.h @@ -44,6 +44,8 @@ enum dict_system_id_t { SYS_FOREIGN_COLS, SYS_TABLESPACES, SYS_DATAFILES, + SYS_ZIP_DICT, + SYS_ZIP_DICT_COLS, /* This must be last item. Defines the number of system tables. */ SYS_NUM_SYSTEM_TABLES @@ -386,6 +388,33 @@ dict_process_sys_datafiles( const rec_t* rec, /*!< in: current SYS_DATAFILES rec */ ulint* space, /*!< out: pace id */ const char** path); /*!< out: datafile path */ + +/** This function parses a SYS_ZIP_DICT record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict( + mem_heap_t* heap, /*!< in/out: heap memory */ + ulint zip_size, /*!< in: nonzero=compressed BLOB page size */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* id, /*!< out: dict id */ + const char** name, /*!< out: dict name */ + const char** data, /*!< out: dict data */ + ulint* data_len); /*!< out: dict data length */ + +/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict_cols( + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* table_id, /*!< out: table id */ + ulint* column_pos, /*!< out: column position */ + ulint* dict_id); /*!< out: dict id */ + /********************************************************************//** Get the filepath for a spaceid from SYS_DATAFILES. This function provides a temporary heap which is used for the table lookup, but not for the path. diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index 87b5787d416..3e2f359bbeb 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -375,6 +375,7 @@ extern bool fts_need_sync; /** Variable specifying the table that has Fulltext index to display its content through information schema table */ extern char* fts_internal_tbl_name; +extern char* fts_internal_tbl_name2; #define fts_que_graph_free(graph) \ do { \ @@ -823,6 +824,15 @@ void fts_drop_orphaned_tables(void); /*==========================*/ +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len); + /******************************************************************//** Since we do a horizontal split on the index table, we need to drop all the split tables. diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h index e36f836e0be..44ff5e6757e 100644 --- a/storage/xtradb/include/os0thread.h +++ b/storage/xtradb/include/os0thread.h @@ -131,14 +131,27 @@ os_thread_create_func( os_thread_id_t* thread_id); /*!< out: id of the created thread, or NULL */ +/** +Waits until the specified thread completes and joins it. Its return value is +ignored. + +@param thread thread to join */ +UNIV_INTERN +void +os_thread_join( + os_thread_t thread); + /*****************************************************************//** Exits the current thread. */ UNIV_INTERN void os_thread_exit( /*===========*/ - void* exit_value) /*!< in: exit value; in Windows this void* + void* exit_value, /*!< in: exit value; in Windows this void* is cast as a DWORD */ + bool detach = true) /*!< in: if true, the thread will be detached + right before exiting. If false, another thread + is responsible for joining this thread. */ UNIV_COLD MY_ATTRIBUTE((noreturn)); /*****************************************************************//** Returns the thread identifier of current thread. diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h index f8133f77466..5da96066f88 100644 --- a/storage/xtradb/include/rem0types.h +++ b/storage/xtradb/include/rem0types.h @@ -71,4 +71,7 @@ enum rec_format_enum { }; typedef enum rec_format_enum rec_format_t; +/** Compressed field header size in bytes */ +#define ZIP_COLUMN_HEADER_LENGTH 2 + #endif diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index fc1846b76f3..27d3adfc7f0 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -41,6 +41,9 @@ struct SysIndexCallback; extern ibool row_rollback_on_timeout; +extern uint srv_compressed_columns_zip_level; +extern ulong srv_compressed_columns_threshold; + struct row_prebuilt_t; /*******************************************************************//** @@ -51,6 +54,49 @@ row_mysql_prebuilt_free_blob_heap( /*==============================*/ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a ha_innobase:: table handle */ + +/** Frees the compress heap in prebuilt when no longer needed. */ +UNIV_INTERN +void +row_mysql_prebuilt_free_compress_heap( + row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a + ha_innobase:: table handle */ + +/** Uncompress blob/text/varchar column using zlib +@return pointer to the uncompressed data */ +const byte* +row_decompress_column( + const byte* data, /*!< in: data in innodb(compressed) format */ + ulint *len, /*!< in: data length; out: length of + decompressed data*/ + const byte* dict_data, + /*!< in: optional dictionary data used for + decompression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt); + /*!< in: use prebuilt->compress_heap only + here*/ + +/** Compress blob/text/varchar column using zlib +@return pointer to the compressed data */ +byte* +row_compress_column( + const byte* data, /*!< in: data in mysql(uncompressed) + format */ + ulint *len, /*!< in: data length; out: length of + compressed data*/ + ulint lenlen, /*!< in: bytes used to store the length of + data */ + const byte* dict_data, + /*!< in: optional dictionary data used for + compression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt); + /*!< in: use prebuilt->compress_heap only + here*/ + /*******************************************************************//** Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row format. @@ -89,10 +135,21 @@ row_mysql_store_blob_ref( to 4 bytes */ const void* data, /*!< in: BLOB data; if the value to store is SQL NULL this should be NULL pointer */ - ulint len); /*!< in: BLOB length; if the value to store + ulint len, /*!< in: BLOB length; if the value to store is SQL NULL this should be 0; remember also to set the NULL bit in the MySQL record header! */ + bool need_decompression, + /*!< in: if the data need to be compressed*/ + const byte* dict_data, + /*!< in: optional compression dictionary + data */ + ulint dict_data_len, + /*!< in: optional compression dictionary data + length */ + row_prebuilt_t* prebuilt); + /*compress_heap only + here */ /*******************************************************************//** Reads a reference to a BLOB in the MySQL format. @return pointer to BLOB data */ @@ -103,8 +160,17 @@ row_mysql_read_blob_ref( ulint* len, /*!< out: BLOB length */ const byte* ref, /*!< in: BLOB reference in the MySQL format */ - ulint col_len); /*!< in: BLOB reference length + ulint col_len, /*!< in: BLOB reference length (not BLOB length) */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap + only here */ /**************************************************************//** Pad a column with spaces. */ UNIV_INTERN @@ -152,7 +218,16 @@ row_mysql_store_col_in_innobase_format( necessarily the length of the actual payload data; if the column is a true VARCHAR then this is irrelevant */ - ulint comp); /*!< in: nonzero=compact format */ + ulint comp, /*!< in: nonzero=compact format */ + bool need_compression, + /*!< in: if the data need to be + compressed */ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap + only here */ /****************************************************************//** Handles user errors and lock waits detected by the database engine. @return true if it was a lock wait and we should continue running the @@ -643,6 +718,8 @@ struct mysql_row_templ_t { ulint is_unsigned; /*!< if a column type is an integer type and this field is != 0, then it is an unsigned integer type */ + bool compressed; /*!< if column format is compressed */ + LEX_CSTRING zip_dict_data; /*!< associated compression dictionary */ }; #define MYSQL_FETCH_CACHE_SIZE 8 @@ -839,6 +916,8 @@ struct row_prebuilt_t { in fetch_cache */ mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied to this heap */ + mem_heap_t* compress_heap; /*!< memory heap used to compress + /decompress blob column*/ mem_heap_t* old_vers_heap; /*!< memory heap where a previous version is built in consistent read */ bool in_fts_query; /*!< Whether we are in a FTS query */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 692d339608a..09f305091c2 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -487,6 +487,9 @@ extern ibool srv_priority_boost; extern ulint srv_truncated_status_writes; extern ulint srv_available_undo_logs; +extern ulint srv_column_compressed; +extern ulint srv_column_decompressed; + extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; @@ -1079,6 +1082,8 @@ struct export_var_t{ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - purged view's min trx_id */ #endif /* UNIV_DEBUG */ + ulint innodb_column_compressed; /*!< srv_column_compressed */ + ulint innodb_column_decompressed; /*!< srv_column_decompressed */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 4d64e3249c0..296c04d9f62 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 78.1 +#define PERCONA_INNODB_VERSION 79.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 0768bb6bb00..7784e8538b7 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -975,6 +975,7 @@ log_init(void) log_sys->next_checkpoint_no = 0; log_sys->last_checkpoint_lsn = log_sys->lsn; + log_sys->next_checkpoint_lsn = log_sys->lsn; log_sys->n_pending_checkpoint_writes = 0; @@ -1891,6 +1892,7 @@ log_complete_checkpoint(void) log_sys->next_checkpoint_no++; + ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn); log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, log_sys->lsn - log_sys->last_checkpoint_lsn); @@ -1978,11 +1980,17 @@ log_group_checkpoint( ulint i; ut_ad(!srv_read_only_mode); + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE); ut_ad(mutex_own(&(log_sys->mutex))); ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); buf = group->checkpoint_buf; +#ifdef UNIV_DEBUG + lsn_t old_next_checkpoint_lsn + = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); + ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn); +#endif /* UNIV_DEBUG */ mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); @@ -2242,6 +2250,7 @@ log_checkpoint( return(FALSE); } + ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn); log_sys->next_checkpoint_lsn = oldest_lsn; #ifdef UNIV_DEBUG @@ -3490,13 +3499,15 @@ loop: before proceeding further. */ srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; count = 0; - while (buf_page_cleaner_is_active) { - ++count; - os_thread_sleep(100000); - if (srv_print_verbose_log && count > 600) { + while (buf_page_cleaner_is_active || buf_lru_manager_is_active) { + if (srv_print_verbose_log && count == 0) { ib_logf(IB_LOG_LEVEL_INFO, "Waiting for page_cleaner to " "finish flushing of buffer pool"); + } + ++count; + os_thread_sleep(100000); + if (count > 600) { count = 0; } } @@ -3664,6 +3675,7 @@ loop: ut_a(freed); ut_a(lsn == log_sys->lsn); + ut_ad(lsn == log_sys->last_checkpoint_lsn); if (lsn < srv_start_lsn) { ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index d80cb2ad447..46f544178d2 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -441,6 +441,7 @@ log_online_track_missing_on_startup( current server startup */ { ut_ad(last_tracked_lsn != tracking_start_lsn); + ut_ad(srv_track_changed_pages); ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF ", but the last checkpoint LSN is " LSN_PF ". This might be " @@ -623,6 +624,8 @@ log_online_read_init(void) compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0); compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0); + ut_ad(srv_track_changed_pages); + log_bmp_sys = static_cast (ut_malloc(sizeof(*log_bmp_sys))); log_bmp_sys->read_buf_ptr = static_cast @@ -1097,10 +1100,15 @@ log_online_write_bitmap_page( { ibool success; + ut_ad(srv_track_changed_pages); ut_ad(mutex_own(&log_bmp_sys->mutex)); /* Simulate a write error */ - DBUG_EXECUTE_IF("bitmap_page_write_error", return FALSE;); + DBUG_EXECUTE_IF("bitmap_page_write_error", + ib_logf(IB_LOG_LEVEL_ERROR, + "simulating bitmap write error in " + "log_online_write_bitmap_page"); + return FALSE;); success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, @@ -1190,7 +1198,9 @@ log_online_write_bitmap(void) rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); DBUG_EXECUTE_IF("bitmap_page_2_write_error", - DBUG_SET("+d,bitmap_page_write_error");); + ut_ad(bmp_tree_node); /* 2nd page must exist */ + DBUG_SET("+d,bitmap_page_write_error"); + DBUG_SET("-d,bitmap_page_2_write_error");); } rbt_reset(log_bmp_sys->modified_pages); @@ -1211,15 +1221,11 @@ log_online_follow_redo_log(void) log_group_t* group; ibool result; - mutex_enter(&log_bmp_sys->mutex); - - if (!srv_track_changed_pages) { - mutex_exit(&log_bmp_sys->mutex); - return FALSE; - } - + ut_ad(srv_track_changed_pages); ut_ad(!srv_read_only_mode); + mutex_enter(&log_bmp_sys->mutex); + /* Grab the LSN of the last checkpoint, we will parse up to it */ mutex_enter(&(log_sys->mutex)); log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn; @@ -1562,9 +1568,12 @@ log_online_diagnose_bitmap_eof( /* It's a "Warning" here because it's not a fatal error for the whole server */ ib_logf(IB_LOG_LEVEL_WARN, - "changed page bitmap file \'%s\' does not " - "contain a complete run at the end.", - bitmap_file->name); + "changed page bitmap file \'%s\', size " + UINT64PF " bytes, does not " + "contain a complete run at the next read " + "offset " UINT64PF, + bitmap_file->name, bitmap_file->size, + bitmap_file->offset); return FALSE; } } diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc index df68aab8a18..206434dc5ab 100644 --- a/storage/xtradb/mach/mach0data.cc +++ b/storage/xtradb/mach/mach0data.cc @@ -56,7 +56,18 @@ mach_parse_compressed( *val = flag; return(ptr + 1); - } else if (flag < 0xC0UL) { + } + + /* Workaround GCC bug + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673: + the compiler moves mach_read_from_4 right to the beginning of the + function, causing and out-of-bounds read if we are reading a short + integer close to the end of buffer. */ +#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__) + asm volatile("": : :"memory"); +#endif + + if (flag < 0xC0UL) { if (end_ptr < ptr + 2) { return(NULL); } diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc index 1d417f9823c..93f45e060f8 100644 --- a/storage/xtradb/os/os0thread.cc +++ b/storage/xtradb/os/os0thread.cc @@ -210,14 +210,33 @@ os_thread_create_func( #endif } +/** +Waits until the specified thread completes and joins it. Its return value is +ignored. + +@param thread thread to join */ +UNIV_INTERN +void +os_thread_join( + os_thread_t thread) +{ + int ret MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL); + + /* Waiting on already-quit threads is allowed */ + ut_ad(ret == 0 || ret == ESRCH); +} + /*****************************************************************//** Exits the current thread. */ UNIV_INTERN void os_thread_exit( /*===========*/ - void* exit_value) /*!< in: exit value; in Windows this void* + void* exit_value, /*!< in: exit value; in Windows this void* is cast as a DWORD */ + bool detach) /*!< in: if true, the thread will be detached + right before exiting. If false, another thread + is responsible for joining this thread. */ { #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Thread exits, id %lu\n", @@ -233,7 +252,8 @@ os_thread_exit( #ifdef __WIN__ ExitThread((DWORD) exit_value); #else - pthread_detach(pthread_self()); + if (detach) + pthread_detach(pthread_self()); pthread_exit(exit_value); #endif } diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc index a95e9c23613..09cd810cd7b 100644 --- a/storage/xtradb/rem/rem0rec.cc +++ b/storage/xtradb/rem/rem0rec.cc @@ -320,7 +320,8 @@ rec_init_offsets_comp_ordinary( stored in one byte for 0..127. The length will be encoded in two bytes when it is 128 or more, or when the field is stored externally. */ - if (UNIV_UNLIKELY(col->len > 255) + if (UNIV_UNLIKELY(col->len > 255 - + prtype_get_compression_extra(col->prtype)) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -841,8 +842,12 @@ rec_get_converted_size_comp_prefix_low( continue; } - ut_ad(len <= col->len || col->mtype == DATA_BLOB - || (col->len == 0 && col->mtype == DATA_VARCHAR)); + ut_ad(len <= col->len || col->mtype == DATA_BLOB || + ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY + || col->mtype == DATA_VARMYSQL) + && (col->len == 0 + || len <= col->len + + prtype_get_compression_extra(col->prtype)))); fixed_len = field->fixed_len; if (temp && fixed_len @@ -874,7 +879,9 @@ rec_get_converted_size_comp_prefix_low( ut_ad(col->len >= 256 || col->mtype == DATA_BLOB); extra_size += 2; } else if (len < 128 - || (col->len < 256 && col->mtype != DATA_BLOB)) { + || (col->len < 256 - + prtype_get_compression_extra(col->prtype) + && col->mtype != DATA_BLOB)) { extra_size++; } else { /* For variable-length columns, we look up the @@ -1269,12 +1276,16 @@ rec_convert_dtuple_to_rec_comp( *lens-- = (byte) (len >> 8) | 0xc0; *lens-- = (byte) len; } else { - ut_ad(len <= dtype_get_len(type) + ut_ad(len <= dtype_get_len(type) + + prtype_get_compression_extra( + dtype_get_prtype(type)) || dtype_get_mtype(type) == DATA_BLOB || !strcmp(index->name, FTS_INDEX_TABLE_IND_NAME)); if (len < 128 - || (dtype_get_len(type) < 256 + || (dtype_get_len(type) < 256 - + prtype_get_compression_extra( + dtype_get_prtype(type)) && dtype_get_mtype(type) != DATA_BLOB)) { *lens-- = (byte) len; diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index 6fac6c0d317..97f2b8d4b5d 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -960,7 +960,7 @@ fts_parallel_merge( CloseHandle(psort_info->thread_hdl); #endif /*__WIN__ */ - os_thread_exit(NULL); + os_thread_exit(NULL, false); OS_THREAD_DUMMY_RETURN; } diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index a6751b208f7..54183759e8d 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -613,7 +613,7 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 4 + old_pk_size; + mrec_size = 6 + old_pk_size; /* Log enough prefix of the BLOB unless both the old and new table are in COMPACT or REDUNDANT format, @@ -643,8 +643,8 @@ row_log_table_delete( *b++ = static_cast(old_pk_extra_size); /* Log the size of external prefix we saved */ - mach_write_to_2(b, ext_size); - b += 2; + mach_write_to_4(b, ext_size); + b += 4; rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, @@ -2268,14 +2268,14 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */ - if (mrec + 4 >= mrec_end) { + /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ + if (mrec + 6 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_2(mrec); - mrec += 2; + ext_size = mach_read_from_4(mrec); + mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index feb18c82ab6..3f50504bec8 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -523,7 +523,12 @@ row_merge_buf_add( dfield_set_len(field, len); } - ut_ad(len <= col->len || col->mtype == DATA_BLOB); + ut_ad(len <= col->len || col->mtype == DATA_BLOB || + ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY + || col->mtype == DATA_VARMYSQL) + && (col->len == 0 + || len <= col->len + + prtype_get_compression_extra(col->prtype)))); fixed_len = ifield->fixed_len; if (fixed_len && !dict_table_is_comp(index->table) @@ -552,7 +557,9 @@ row_merge_buf_add( } else if (dfield_is_ext(field)) { extra_size += 2; } else if (len < 128 - || (col->len < 256 && col->mtype != DATA_BLOB)) { + || (col->len < 256 - + prtype_get_compression_extra(col->prtype) + && col->mtype != DATA_BLOB)) { extra_size++; } else { /* For variable-length columns, we look up the @@ -3780,6 +3787,13 @@ wait_again: " exited when creating FTS" " index '%s'", indexes[i]->name); + } else { + for (j = 0; j < FTS_NUM_AUX_INDEX; + j++) { + + os_thread_join(merge_info[j] + .thread_hdl); + } } } else { /* This cannot report duplicates; an diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 466ff113127..d54ac222137 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -65,11 +65,54 @@ Created 9/17/2000 Heikki Tuuri #include "m_string.h" #include "my_sys.h" #include "ha_prototypes.h" +#include "zlib.h" #include /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; +/** +Z_NO_COMPRESSION = 0 +Z_BEST_SPEED = 1 +Z_BEST_COMPRESSION = 9 +Z_DEFAULT_COMPRESSION = -1 +Compression level to be used by zlib for compressed-blob columns. +Settable by user. +*/ +UNIV_INTERN uint srv_compressed_columns_zip_level = DEFAULT_COMPRESSION_LEVEL; +/** +(Z_FILTERED | Z_HUFFMAN_ONLY | Z_RLE | Z_FIXED | Z_DEFAULT_STRATEGY) + +The strategy parameter is used to tune the compression algorithm. Use the +value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a +filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only +(no string match), or Z_RLE to limit match distances to one +(run-length encoding). Filtered data consists mostly of small values with a +somewhat random distribution. In this case, the compression algorithm is +tuned to compress them better. +The effect of Z_FILTERED is to force more Huffman coding and less string +matching; it is somewhat intermediate between Z_DEFAULT_STRATEGY and +Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as Z_HUFFMAN_ONLY, +but give better compression for PNG image data. The strategy parameter only +affects the compression ratio but not the correctness of the compressed +output even if it is not set appropriately. Z_FIXED prevents the use of +dynamic Huffman codes, allowing for a simpler decoder for special +applications. +*/ +const uint srv_compressed_columns_zlib_strategy = Z_DEFAULT_STRATEGY; +/** Compress the column if the data length exceeds this value. */ +UNIV_INTERN ulong srv_compressed_columns_threshold = 96; +/** +Determine if zlib needs to compute adler32 value for the compressed data. +This variables is similar to page_zip_zlib_wrap, but only used by +compressed blob columns. +*/ +const bool srv_compressed_columns_zlib_wrap = true; +/** +Determine if zlib will use custom memory allocation functions based on +InnoDB memory heap routines (mem_heap_t*). +*/ +const bool srv_compressed_columns_zlib_use_heap = false; /** Chain node of the list of tables to drop in the background. */ struct row_mysql_drop_t{ char* table_name; /*!< table name */ @@ -173,6 +216,17 @@ row_mysql_prebuilt_free_blob_heap( prebuilt->blob_heap = NULL; } +/** Frees the compress heap in prebuilt when no longer needed. */ +UNIV_INTERN +void +row_mysql_prebuilt_free_compress_heap( + row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a + ha_innobase:: table handle */ +{ + mem_heap_free(prebuilt->compress_heap); + prebuilt->compress_heap = NULL; +} + /*******************************************************************//** Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row format. @@ -229,6 +283,425 @@ row_mysql_read_true_varchar( return(field + 1); } +/** + Compressed BLOB header format: + --------------------------------------------------------------- + | reserved | wrap | algorithm | len-len | compressed | unused | + | [1] | [1] | [5] | [3] | [1] | [5] | + --------------------------------------------------------------- + | 0 0 | 1 1 | 2 6 | 7 9 | 10 10 | 11 15 | + --------------------------------------------------------------- + * 'reserved' bit is planned to be used in future versions of the BLOB + header. In this version it must always be + 'default_zip_column_reserved_value' (0). + * 'wrap' identifies if compression algorithm calculated a checksum + (adler32 in case of zlib) and appended it to the compressed data. + * 'algorithm' identifies which algoritm was used to compress this BLOB. + Currently, the only value 'default_zip_column_algorithm_value' (0) is + supported. + * 'len-len' field identifies the length of the column length data portion + followed by this header (see below). + * If 'compressed' bit is set to 1, then this header is immediately followed + by 1..8 bytes (depending on the value of 'len-len' bitfield) which + determine original (uncompressed) block size. These 'len-len' bytes are + followed by compressed representation of the original data. + * If 'compressed' bit is set to 0, every other bitfield ('wrap', + 'algorithm' and 'le-len') must be ignored. In this case the header is + immediately followed by uncompressed (original) data. +*/ + +/** + Currently the only supported value for the 'reserved' field is + false (0). +*/ +static const bool default_zip_column_reserved_value = false; + +/** + Currently the only supported value for the 'algorithm' field is 0, which + means 'zlib'. +*/ +static const uint default_zip_column_algorithm_value = 0; + +static const size_t zip_column_prefix_max_length = + ZIP_COLUMN_HEADER_LENGTH + 8; +static const size_t zip_column_header_length = ZIP_COLUMN_HEADER_LENGTH; + +/* 'reserved', bit 0 */ +static const uint zip_column_reserved = 0; +/* 0000 0000 0000 0001 */ +static const uint zip_column_reserved_mask = 0x0001; + +/* 'wrap', bit 1 */ +static const uint zip_column_wrap = 1; +/* 0000 0000 0000 0010 */ +static const uint zip_column_wrap_mask = 0x0002; + +/* 'algorithm', bit 2,3,4,5,6 */ +static const uint zip_column_algorithm = 2; +/* 0000 0000 0111 1100 */ +static const uint zip_column_algorithm_mask = 0x007C; + +/* 'len-len', bit 7,8,9 */ +static const uint zip_column_data_length = 7; +/* 0000 0011 1000 0000 */ +static const uint zip_column_data_length_mask = 0x0380; + +/* 'compressed', bit 10 */ +static const uint zip_column_compressed = 10; +/* 0000 0100 0000 0000 */ +static const uint zip_column_compressed_mask = 0x0400; + +/** Updates compressed block header with the given components */ +static void +column_set_compress_header( + byte* data, + bool compressed, + ulint lenlen, + uint alg, + bool wrap, + bool reserved) +{ + ulint header = 0; + header |= (compressed << zip_column_compressed); + header |= (lenlen << zip_column_data_length); + header |= (alg << zip_column_algorithm); + header |= (wrap << zip_column_wrap); + header |= (reserved << zip_column_reserved); + mach_write_to_2(data, header); +} + +/** Parse compressed block header into components */ +static void +column_get_compress_header( + const byte* data, + bool* compressed, + ulint* lenlen, + uint* alg, + bool* wrap, + bool* reserved +) +{ + ulint header = mach_read_from_2(data); + *compressed = ((header & zip_column_compressed_mask) >> + zip_column_compressed); + *lenlen = ((header & zip_column_data_length_mask) >> + zip_column_data_length); + *alg = ((header & zip_column_algorithm_mask) >> + zip_column_algorithm); + *wrap = ((header & zip_column_wrap_mask) >> + zip_column_wrap); + *reserved = ((header & zip_column_reserved_mask) >> + zip_column_reserved); +} + +/** Allocate memory for zlib. */ +static +void* +column_zip_zalloc( + void* opaque, /*!< in/out: memory heap */ + uInt items, /*!< in: number of items to allocate */ + uInt size) /*!< in: size of an item in bytes */ +{ + return(mem_heap_zalloc(static_cast(opaque), + items * size)); +} + +/** Deallocate memory for zlib. */ +static +void +column_zip_free( + void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */ + void* address MY_ATTRIBUTE((unused))) /*!< in: object to free */ +{ +} + +/** Configure the zlib allocator to use the given memory heap. */ +UNIV_INTERN +void +column_zip_set_alloc( + void* stream, /*!< in/out: zlib stream */ + mem_heap_t* heap) /*!< in: memory heap to use */ +{ + z_stream* strm = static_cast(stream); + + if (srv_compressed_columns_zlib_use_heap) { + strm->zalloc = column_zip_zalloc; + strm->zfree = column_zip_free; + strm->opaque = heap; + } else { + strm->zalloc = (alloc_func)0; + strm->zfree = (free_func)0; + strm->opaque = (voidpf)0; + } +} + +/** Compress blob/text/varchar column using zlib +@return pointer to the compressed data */ +byte* +row_compress_column( + const byte* data, /*!< in: data in mysql(uncompressed) + format */ + ulint *len, /*!< in: data length; out: length of + compressed data*/ + ulint lenlen, /*!< in: bytes used to store the length of + data */ + const byte* dict_data, + /*!< in: optional dictionary data used for + compression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap only + here*/ +{ + int err = 0; + ulint comp_len = *len; + ulint buf_len = *len + zip_column_prefix_max_length; + byte* buf; + byte* ptr; + z_stream c_stream; + bool wrap = srv_compressed_columns_zlib_wrap; + + int window_bits = wrap ? MAX_WBITS : -MAX_WBITS; + + if (!prebuilt->compress_heap) { + prebuilt->compress_heap = + mem_heap_create(max(UNIV_PAGE_SIZE, buf_len)); + } + + buf = static_cast(mem_heap_zalloc( + prebuilt->compress_heap,buf_len)); + + if (*len < srv_compressed_columns_threshold || + srv_compressed_columns_zip_level == Z_NO_COMPRESSION) + goto do_not_compress; + + ptr = buf + zip_column_header_length + lenlen; + + /*init deflate object*/ + c_stream.next_in = const_cast(data); + c_stream.avail_in = *len; + c_stream.next_out = ptr; + c_stream.avail_out = comp_len; + + column_zip_set_alloc(&c_stream, prebuilt->compress_heap); + + err = deflateInit2(&c_stream, srv_compressed_columns_zip_level, + Z_DEFLATED, window_bits, MAX_MEM_LEVEL, + srv_compressed_columns_zlib_strategy); + ut_a(err == Z_OK); + + if (dict_data != 0 && dict_data_len != 0) { + err = deflateSetDictionary(&c_stream, dict_data, + dict_data_len); + ut_a(err == Z_OK); + } + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&c_stream); + if (err == Z_OK) + err = Z_BUF_ERROR; + } else { + comp_len = c_stream.total_out; + err = deflateEnd(&c_stream); + } + + switch (err) { + case Z_OK: + break; + case Z_BUF_ERROR: + /* data after compress is larger than uncompressed data*/ + break; + default: + ib_logf(IB_LOG_LEVEL_ERROR, + "failed to compress the column, error: %d\n", err); + } + + /* make sure the compressed data size is smaller than + uncompressed data */ + if (err == Z_OK && + *len > (comp_len + zip_column_header_length + lenlen)) { + column_set_compress_header(buf, true, lenlen - 1, + default_zip_column_algorithm_value, wrap, + default_zip_column_reserved_value); + ptr = buf + zip_column_header_length; + /*store the uncompressed data length*/ + switch (lenlen) { + case 1: + mach_write_to_1(ptr, *len); + break; + case 2: + mach_write_to_2(ptr, *len); + break; + case 3: + mach_write_to_3(ptr, *len); + break; + case 4: + mach_write_to_4(ptr, *len); + break; + default: + ut_error; + } + + *len = comp_len + zip_column_header_length + lenlen; + return buf; + } + +do_not_compress: + ptr = buf; + column_set_compress_header(ptr, false, 0, + default_zip_column_algorithm_value, false, + default_zip_column_reserved_value); + ptr += zip_column_header_length; + memcpy(ptr, data, *len); + *len += zip_column_header_length; + return buf; +} + +/** Uncompress blob/text/varchar column using zlib +@return pointer to the uncompressed data */ +const byte* +row_decompress_column( + const byte* data, /*!< in: data in innodb(compressed) format */ + ulint *len, /*!< in: data length; out: length of + decompressed data*/ + const byte* dict_data, + /*!< in: optional dictionary data used for + decompression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap only + here*/ +{ + ulint buf_len = 0; + byte* buf; + int err = 0; + int window_bits = 0; + z_stream d_stream; + bool is_compressed = false; + bool wrap = false; + bool reserved = false; + ulint lenlen = 0; + uint alg = 0; + + ut_ad(*len != ULINT_UNDEFINED); + ut_ad(*len >= zip_column_header_length); + + column_get_compress_header(data, &is_compressed, &lenlen, &alg, + &wrap, &reserved); + + if (reserved != default_zip_column_reserved_value) { + ib_logf(IB_LOG_LEVEL_FATAL, + "unsupported compressed BLOB header format\n"); + } + + if (alg != default_zip_column_algorithm_value) { + ib_logf(IB_LOG_LEVEL_FATAL, + "unsupported 'algorithm' value in the" + " compressed BLOB header\n"); + } + + ut_a(lenlen < 4); + + data += zip_column_header_length; + if (!is_compressed) { /* column not compressed */ + *len -= zip_column_header_length; + return data; + } + + lenlen++; + + ulint comp_len = *len - zip_column_header_length - lenlen; + + ulint uncomp_len = 0; + switch (lenlen) { + case 1: + uncomp_len = mach_read_from_1(data); + break; + case 2: + uncomp_len = mach_read_from_2(data); + break; + case 3: + uncomp_len = mach_read_from_3(data); + break; + case 4: + uncomp_len = mach_read_from_4(data); + break; + default: + ut_error; + } + + data += lenlen; + + /* data is compressed, decompress it*/ + if (!prebuilt->compress_heap) { + prebuilt->compress_heap = + mem_heap_create(max(UNIV_PAGE_SIZE, uncomp_len)); + } + + buf_len = uncomp_len; + buf = static_cast(mem_heap_zalloc( + prebuilt->compress_heap, buf_len)); + + /* init d_stream */ + d_stream.next_in = const_cast(data); + d_stream.avail_in = comp_len; + d_stream.next_out = buf; + d_stream.avail_out = buf_len; + + column_zip_set_alloc(&d_stream, prebuilt->compress_heap); + + window_bits = wrap ? MAX_WBITS : -MAX_WBITS; + err = inflateInit2(&d_stream, window_bits); + ut_a(err == Z_OK); + + err = inflate(&d_stream, Z_FINISH); + if (err == Z_NEED_DICT) { + ut_a(dict_data != 0 && dict_data_len != 0); + err = inflateSetDictionary(&d_stream, dict_data, + dict_data_len); + ut_a(err == Z_OK); + err = inflate(&d_stream, Z_FINISH); + } + + if (err != Z_STREAM_END) { + inflateEnd(&d_stream); + if (err == Z_BUF_ERROR && d_stream.avail_in == 0) + err = Z_DATA_ERROR; + } else { + buf_len = d_stream.total_out; + err = inflateEnd(&d_stream); + } + + switch (err) { + case Z_OK: + break; + case Z_BUF_ERROR: + ib_logf(IB_LOG_LEVEL_FATAL, + "zlib buf error, this shouldn't happen\n"); + break; + default: + ib_logf(IB_LOG_LEVEL_FATAL, + "failed to decompress column, error: %d\n", err); + } + + if (err == Z_OK) { + if (buf_len != uncomp_len) { + ib_logf(IB_LOG_LEVEL_FATAL, + "failed to decompress blob column, may" + " be corrupted\n"); + } + *len = buf_len; + return buf; + } + + *len -= (zip_column_header_length + lenlen); + return data; +} + + /*******************************************************************//** Stores a reference to a BLOB in the MySQL format. */ UNIV_INTERN @@ -242,10 +715,21 @@ row_mysql_store_blob_ref( to 4 bytes */ const void* data, /*!< in: BLOB data; if the value to store is SQL NULL this should be NULL pointer */ - ulint len) /*!< in: BLOB length; if the value to store + ulint len, /*!< in: BLOB length; if the value to store is SQL NULL this should be 0; remember also to set the NULL bit in the MySQL record header! */ + bool need_decompression, + /*!< in: if the data need to be compressed*/ + const byte* dict_data, + /*!< in: optional compression dictionary + data */ + ulint dict_data_len, + /*!< in: optional compression dictionary data + length */ + row_prebuilt_t* prebuilt) + /*compress_heap only + here */ { /* MySQL might assume the field is set to zero except the length and the pointer fields */ @@ -257,13 +741,28 @@ row_mysql_store_blob_ref( In 32-bit architectures we only use the first 4 bytes of the pointer slot. */ - ut_a(col_len - 8 > 1 || len < 256); - ut_a(col_len - 8 > 2 || len < 256 * 256); - ut_a(col_len - 8 > 3 || len < 256 * 256 * 256); + ut_a(col_len - 8 > 1 || + len < 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); + ut_a(col_len - 8 > 2 || + len < 256 * 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); + ut_a(col_len - 8 > 3 || + len < 256 * 256 * 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); + + const byte *ptr = NULL; + + if (need_decompression) + ptr = row_decompress_column((const byte*)data, &len, + dict_data, dict_data_len, prebuilt); + + if (ptr) + memcpy(dest + col_len - 8, &ptr, sizeof ptr); + else + memcpy(dest + col_len - 8, &data, sizeof data); mach_write_to_n_little_endian(dest, col_len - 8, len); - - memcpy(dest + col_len - 8, &data, sizeof data); } /*******************************************************************//** @@ -276,15 +775,32 @@ row_mysql_read_blob_ref( ulint* len, /*!< out: BLOB length */ const byte* ref, /*!< in: BLOB reference in the MySQL format */ - ulint col_len) /*!< in: BLOB reference length + ulint col_len, /*!< in: BLOB reference length (not BLOB length) */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap + only here */ { - byte* data; + byte* data = NULL; + byte* ptr = NULL; *len = mach_read_from_n_little_endian(ref, col_len - 8); memcpy(&data, ref + col_len - 8, sizeof data); + if (need_compression) { + ptr = row_compress_column(data, len, col_len - 8, dict_data, + dict_data_len, prebuilt); + if (ptr) + data = ptr; + } + return(data); } @@ -367,7 +883,16 @@ row_mysql_store_col_in_innobase_format( necessarily the length of the actual payload data; if the column is a true VARCHAR then this is irrelevant */ - ulint comp) /*!< in: nonzero=compact format */ + ulint comp, /*!< in: nonzero=compact format */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap + only here */ { const byte* ptr = mysql_data; const dtype_t* dtype; @@ -420,8 +945,14 @@ row_mysql_store_col_in_innobase_format( lenlen = 2; } - ptr = row_mysql_read_true_varchar(&col_len, mysql_data, - lenlen); + const byte* tmp_ptr = row_mysql_read_true_varchar( + &col_len, mysql_data, lenlen); + if (need_compression) + ptr = row_compress_column(tmp_ptr, &col_len, + lenlen, dict_data, dict_data_len, + prebuilt); + else + ptr = tmp_ptr; } else { /* Remove trailing spaces from old style VARCHAR columns. */ @@ -503,7 +1034,9 @@ row_mysql_store_col_in_innobase_format( } } else if (type == DATA_BLOB && row_format_col) { - ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); + ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len, + need_compression, dict_data, dict_data_len, + prebuilt); } dfield_set_data(dfield, ptr, col_len); @@ -561,7 +1094,11 @@ row_mysql_convert_row_to_innobase( TRUE, /* MySQL row format data */ mysql_rec + templ->mysql_col_offset, templ->mysql_col_len, - dict_table_is_comp(prebuilt->table)); + dict_table_is_comp(prebuilt->table), + templ->compressed, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, prebuilt); next_column: ; } @@ -907,6 +1444,10 @@ row_prebuilt_free( mem_heap_free(prebuilt->blob_heap); } + if (prebuilt->compress_heap) { + mem_heap_free(prebuilt->compress_heap); + } + if (prebuilt->old_vers_heap) { mem_heap_free(prebuilt->old_vers_heap); } @@ -1333,6 +1874,9 @@ row_insert_for_mysql( return(DB_READ_ONLY); } + if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) + mem_heap_empty(prebuilt->compress_heap); + trx->op_info = "inserting"; row_mysql_delay_if_needed(); @@ -2693,6 +3237,10 @@ loop: return(n_tables + n_tables_dropped); } + DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep", + os_thread_sleep(5000000); + ); + table = dict_table_open_on_name(drop->table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); @@ -2703,6 +3251,16 @@ loop: goto already_dropped; } + if (!table->to_be_dropped) { + /* There is a scenario: the old table is dropped + just after it's added into drop list, and new + table with the same name is created, then we try + to drop the new table in background. */ + dict_table_close(table, FALSE, FALSE); + + goto already_dropped; + } + ut_a(!table->can_be_evicted); dict_table_close(table, FALSE, FALSE); @@ -2833,6 +3391,12 @@ row_mysql_table_id_reassign( pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", *new_id); + /* As micro-SQL does not support int4 == int8 comparisons, + old and new IDs are added again under different names as + int4 values*/ + pars_info_add_int4_literal(info, "old_id_narrow", table->id); + pars_info_add_int4_literal(info, "new_id_narrow", *new_id); + err = que_eval_sql( info, "PROCEDURE RENUMBER_TABLE_PROC () IS\n" @@ -2843,6 +3407,8 @@ row_mysql_table_id_reassign( " WHERE TABLE_ID = :old_id;\n" "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n" " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_ZIP_DICT_COLS SET TABLE_ID = :new_id_narrow\n" + " WHERE TABLE_ID = :old_id_narrow;\n" "END;\n", FALSE, trx); return(err); @@ -3609,6 +4175,12 @@ next_rec: pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", new_id); + /* As micro-SQL does not support int4 == int8 comparisons, + old and new IDs are added again under different names as + int4 values*/ + pars_info_add_int4_literal(info, "old_id_narrow", table->id); + pars_info_add_int4_literal(info, "new_id_narrow", new_id); + err = que_eval_sql(info, "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" "BEGIN\n" @@ -3620,6 +4192,9 @@ next_rec: "UPDATE SYS_INDEXES" " SET TABLE_ID = :new_id, SPACE = :new_space\n" " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_ZIP_DICT_COLS\n" + " SET TABLE_ID = :new_id_narrow\n" + " WHERE TABLE_ID = :old_id_narrow;\n" "END;\n" , FALSE, trx); @@ -3962,6 +4537,13 @@ row_drop_table_for_mysql( } } + + DBUG_EXECUTE_IF("row_drop_table_add_to_background", + row_add_table_to_background_drop_list(table->name); + err = DB_SUCCESS; + goto funct_exit; + ); + /* TODO: could we replace the counter n_foreign_key_checks_running with lock checks on the table? Acquire here an exclusive lock on the table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that @@ -4232,6 +4814,19 @@ row_drop_table_for_mysql( filepath = fil_make_ibd_name(tablename, false); } + /* Remove all compression dictionary references for the + table */ + err = dict_create_remove_zip_dict_references_for_table( + table->id, trx); + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, "Error: (%s) not " + "able to remove compression dictionary " + "references for table %s", ut_strerr(err), + tablename); + + goto funct_exit; + } + if (dict_table_has_fts_index(table) || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { ut_ad(table->n_ref_count == 0); @@ -4578,6 +5173,19 @@ loop: row_mysql_lock_data_dictionary(trx); while ((table_name = dict_get_first_table_name_in_db(name))) { + /* Drop parent table if it is a fts aux table, to + avoid accessing dropped fts aux tables in information + scheam when parent table still exists. + Note: Drop parent table will drop fts aux tables. */ + char* parent_table_name; + parent_table_name = fts_get_parent_table_name( + table_name, strlen(table_name)); + + if (parent_table_name != NULL) { + mem_free(table_name); + table_name = parent_table_name; + } + ut_a(memcmp(table_name, name, namelen) == 0); table = dict_table_open_on_name( diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index 74579687a9b..d2821abdc2e 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -2460,9 +2460,11 @@ row_sel_convert_mysql_key_to_innobase( if (UNIV_LIKELY(!is_null)) { buf = row_mysql_store_col_in_innobase_format( dfield, buf, - FALSE, /* MySQL key value format col */ + /* MySQL key value format col */ + FALSE, key_ptr + data_offset, data_len, - dict_table_is_comp(index->table)); + dict_table_is_comp(index->table), + false, 0, 0 ,0); ut_a(buf <= original_buf + buf_len); } @@ -2555,12 +2557,16 @@ row_sel_store_row_id_to_prebuilt( #ifdef UNIV_DEBUG /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len) +# define row_sel_field_store_in_mysql_format( \ + dest,templ,idx,field,src,len,prebuilt) \ + row_sel_field_store_in_mysql_format_func \ + (dest,templ,idx,field,src,len, prebuilt) #else /* UNIV_DEBUG */ /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func(dest,templ,src,len) +# define row_sel_field_store_in_mysql_format( \ + dest,templ,idx,field,src,len,prebuilt) \ + row_sel_field_store_in_mysql_format_func \ + (dest,templ,src,len, prebuilt) #endif /* UNIV_DEBUG */ /**************************************************************//** @@ -2590,7 +2596,10 @@ row_sel_field_store_in_mysql_format_func( templ->icp_rec_field_no */ #endif /* UNIV_DEBUG */ const byte* data, /*!< in: data to store */ - ulint len) /*!< in: length of the data */ + ulint len, /*!< in: length of the data */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap + only here */ { byte* ptr; #ifdef UNIV_DEBUG @@ -2634,6 +2643,15 @@ row_sel_field_store_in_mysql_format_func( field_end = dest + templ->mysql_col_len; if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { + /* If this is a compressed column, + decompress it first */ + if (templ->compressed) + data = row_decompress_column(data, &len, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, + prebuilt); + /* This is a >= 5.0.3 type true VARCHAR. Store the length of the data to the first byte or the first two bytes of dest. */ @@ -2684,7 +2702,11 @@ row_sel_field_store_in_mysql_format_func( already copied to the buffer in row_sel_store_mysql_rec */ row_mysql_store_blob_ref(dest, templ->mysql_col_len, data, - len); + len, templ->compressed, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, + prebuilt); break; case DATA_MYSQL: @@ -2837,7 +2859,7 @@ row_sel_store_mysql_field_func( row_sel_field_store_in_mysql_format( mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); + templ, index, field_no, data, len, prebuilt); if (heap != prebuilt->blob_heap) { mem_heap_free(heap); @@ -2887,7 +2909,7 @@ row_sel_store_mysql_field_func( row_sel_field_store_in_mysql_format( mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); + templ, index, field_no, data, len, prebuilt); } ut_ad(len != UNIV_SQL_NULL); @@ -2935,6 +2957,9 @@ row_sel_store_mysql_rec( prebuilt->blob_heap = NULL; } + if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) + mem_heap_empty(prebuilt->compress_heap); + for (i = 0; i < prebuilt->n_template; i++) { const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; const ulint field_no diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 930694ac0af..8f1d341ad1b 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -2714,6 +2714,12 @@ files_checked: return(err); } + /* Create the SYS_ZIP_DICT system table */ + err = dict_create_or_check_sys_zip_dict(); + if (err != DB_SUCCESS) { + return(err); + } + srv_is_being_started = FALSE; ut_a(trx_purge_state() == PURGE_STATE_INIT); From d9787aa29af3e77c5cd04defe0331c721542cff6 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 25 Oct 2016 17:03:23 +0200 Subject: [PATCH 83/96] 5.6.33-79.0 --- storage/tokudb/CMakeLists.txt | 2 +- .../tokudb/PerconaFT/buildheader/make_tdb.cc | 3 + .../ft/cachetable/cachetable-internal.h | 2 + .../PerconaFT/ft/cachetable/cachetable.cc | 16 ++ .../PerconaFT/ft/cachetable/cachetable.h | 6 + storage/tokudb/PerconaFT/ft/ft-ops.cc | 151 ++++++++++++++- storage/tokudb/PerconaFT/ft/ft-ops.h | 2 + storage/tokudb/PerconaFT/ft/ft.cc | 14 +- storage/tokudb/PerconaFT/ft/ft.h | 6 + .../tokudb/PerconaFT/ft/logger/logformat.cc | 9 + storage/tokudb/PerconaFT/ft/logger/recover.cc | 78 ++++++++ .../PerconaFT/ft/serialize/rbtree_mhs.h | 16 +- .../test-rbtree-insert-remove-without-mhs.cc | 7 +- storage/tokudb/PerconaFT/ft/txn/roll.cc | 118 +++++++++++- storage/tokudb/PerconaFT/portability/file.cc | 6 + .../tokudb/PerconaFT/portability/memory.cc | 9 + storage/tokudb/PerconaFT/portability/memory.h | 4 +- .../PerconaFT/portability/toku_portability.h | 2 + .../tokudb/PerconaFT/src/tests/CMakeLists.txt | 42 ++-- .../src/tests/recovery_fileops_unit.cc | 159 ++++++++-------- storage/tokudb/PerconaFT/src/ydb-internal.h | 3 +- storage/tokudb/PerconaFT/src/ydb.cc | 50 ++++- storage/tokudb/PerconaFT/src/ydb_db.cc | 99 +++++++--- storage/tokudb/PerconaFT/src/ydb_db.h | 16 ++ storage/tokudb/hatoku_hton.cc | 1 + .../r/dir-per-db-with-custom-data-dir.result | 10 + .../mysql-test/tokudb/r/dir_per_db.result | 180 ++++++++++++++++++ .../r/i_s_tokudb_lock_waits_released.result | 12 ++ .../mysql-test/tokudb/r/row_format.result | 51 +++++ ...dir-per-db-with-custom-data-dir-master.opt | 1 + .../t/dir-per-db-with-custom-data-dir.test | 16 ++ .../mysql-test/tokudb/t/dir_per_db.test | 76 ++++++++ .../tokudb/t/dir_per_db_show_table_files.inc | 9 + .../t/i_s_tokudb_lock_waits_released.test | 29 ++- .../mysql-test/tokudb/t/row_format.test | 41 ++++ .../mysql-test/tokudb_bugs/r/db938.result | 1 + .../mysql-test/tokudb_bugs/t/db938.test | 3 + .../t/partition_debug_sync_tokudb.test | 4 +- storage/tokudb/tokudb_sysvars.cc | 14 ++ storage/tokudb/tokudb_sysvars.h | 1 + 40 files changed, 1107 insertions(+), 162 deletions(-) create mode 100644 storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result create mode 100644 storage/tokudb/mysql-test/tokudb/r/dir_per_db.result create mode 100644 storage/tokudb/mysql-test/tokudb/r/row_format.result create mode 100644 storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt create mode 100644 storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test create mode 100644 storage/tokudb/mysql-test/tokudb/t/dir_per_db.test create mode 100644 storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc create mode 100644 storage/tokudb/mysql-test/tokudb/t/row_format.test diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index fbb02582f4d..ad30e6d40eb 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -1,4 +1,4 @@ -SET(TOKUDB_VERSION 5.6.32-78.1) +SET(TOKUDB_VERSION 5.6.33-79.0) # PerconaFT only supports x86-64 and cmake-2.8.9+ IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT CMAKE_VERSION VERSION_LESS "2.8.9") diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc index 576f902f6ae..7ede78b3c0d 100644 --- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc +++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc @@ -422,6 +422,9 @@ static void print_db_env_struct (void) { "int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)", "void (*set_check_thp)(DB_ENV *, bool new_val)", "bool (*get_check_thp)(DB_ENV *)", + "bool (*set_dir_per_db)(DB_ENV *, bool new_val)", + "bool (*get_dir_per_db)(DB_ENV *)", + "const char *(*get_data_dir)(DB_ENV *env)", NULL}; sort_and_dump_fields("db_env", true, extra); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h index dc6aec9226d..05fb771de08 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h @@ -138,6 +138,8 @@ struct cachefile { // nor attempt to open any cachefile with the same fname (dname) // until this cachefile has been fully closed and unlinked. bool unlink_on_close; + // If set then fclose will not be logged in recovery log. + bool skip_log_recover_on_close; int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ CACHETABLE cachetable; struct fileid fileid; diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc index 5bba977de1a..6d753805fa9 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc @@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) { return cf->fname_in_env; } +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) { + cf->fname_in_env = new_fname_in_env; +} + int toku_cachefile_get_fd (CACHEFILE cf) { return cf->fd; @@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) { return cf->unlink_on_close; } +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = true; +} + +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = false; +} + +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) { + return cf->skip_log_recover_on_close; +} + uint64_t toku_cachefile_size(CACHEFILE cf) { int64_t file_size; int fd = toku_cachefile_get_fd(cf); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h index 148326562ab..3b3cb0a2d46 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h @@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE); // Return the filename char * toku_cachefile_fname_in_env (CACHEFILE cf); +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env); + // Make it so when the cachefile closes, the underlying file is unlinked void toku_cachefile_unlink_on_close(CACHEFILE cf); // is this cachefile marked as unlink on close? bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf); +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf); +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf); + // Return the logger associated with the cachefile struct tokulogger *toku_cachefile_logger(CACHEFILE cf); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index f131668889e..30a8710d7aa 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -149,22 +149,23 @@ basement nodes, bulk fetch, and partial fetch: #include "ft/cachetable/checkpoint.h" #include "ft/cursor.h" -#include "ft/ft.h" #include "ft/ft-cachetable-wrappers.h" #include "ft/ft-flusher.h" #include "ft/ft-internal.h" -#include "ft/msg.h" +#include "ft/ft.h" #include "ft/leafentry.h" #include "ft/logger/log-internal.h" +#include "ft/msg.h" #include "ft/node.h" #include "ft/serialize/block_table.h" -#include "ft/serialize/sub_block.h" #include "ft/serialize/ft-serialize.h" #include "ft/serialize/ft_layout_version.h" #include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" #include "ft/txn/txn_manager.h" -#include "ft/ule.h" #include "ft/txn/xids.h" +#include "ft/ule.h" +#include "src/ydb-internal.h" #include @@ -179,6 +180,7 @@ basement nodes, bulk fetch, and partial fetch: #include +#include /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ @@ -2593,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; +inline bool toku_file_is_root(const char *path, const char *last_slash) { + return last_slash == path; +} + +static std::unique_ptr toku_file_get_parent_dir( + const char *path) { + std::unique_ptr result(nullptr, &toku_free); + + bool has_trailing_slash = false; + + /* Find the offset of the last slash */ + const char *last_slash = strrchr(path, OS_PATH_SEPARATOR); + + if (!last_slash) { + /* No slash in the path, return NULL */ + return result; + } + + /* Ok, there is a slash. Is there anything after it? */ + if (static_cast(last_slash - path + 1) == strlen(path)) { + has_trailing_slash = true; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + /* If a trailing slash prevented the first strrchr() from trimming + the last component of the path, trim that component now. */ + if (has_trailing_slash) { + /* Back up to the previous slash. */ + last_slash--; + while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + result.reset(toku_strndup(path, last_slash - path)); + return result; +} + +static bool toku_create_subdirs_if_needed(const char *path) { + static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH; + + toku_struct_stat stat; + bool subdir_exists = true; + auto subdir = toku_file_get_parent_dir(path); + + if (!subdir.get()) + return true; + + if (toku_stat(subdir.get(), &stat) == -1) { + if (ENOENT == get_error_errno()) + subdir_exists = false; + else + return false; + } + + if (subdir_exists) { + if (!S_ISDIR(stat.st_mode)) + return false; + return true; + } + + if (!toku_create_subdirs_if_needed(subdir.get())) + return false; + + if (toku_os_mkdir(subdir.get(), dir_mode)) + return false; + + return true; +} + // open a file for use by the ft // Requires: File does not exist. static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { int r; int fd; int er; + if (!toku_create_subdirs_if_needed(fname)) + return get_error_errno(); fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); assert(fd==-1); if ((er = get_maybe_error_errno()) != ENOENT) { @@ -4427,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) { toku_cachefile_unlink_on_close(cf); } +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct) { + int r = 0; + + std::unique_ptr new_iname_full(nullptr, + &toku_free); + std::unique_ptr old_iname_full(nullptr, + &toku_free); + + new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname)); + old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname)); + + if (txn) { + BYTESTRING bs_old_name = {static_cast(strlen(old_iname) + 1), + const_cast(old_iname)}; + BYTESTRING bs_new_name = {static_cast(strlen(new_iname) + 1), + const_cast(new_iname)}; + FILENUM filenum = FILENUM_NONE; + { + CACHEFILE cf; + r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname)); + toku_free(old_fname_in_cf); + filenum = toku_cachefile_filenum(cf); + } + } + toku_logger_save_rollback_frename( + db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name); + toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger, + (LSN *)0, + 0, + toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn), + bs_old_name, + filenum, + bs_new_name); + } + + r = toku_os_rename(old_iname_full.get(), new_iname_full.get()); + if (r != 0) + return r; + r = toku_fsync_directory(new_iname_full.get()); + return r; +} + int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { int fd = toku_cachefile_get_fd(ft_handle->ft->cf); toku_ft_lock(ft_handle->ft); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h index 313a74628ea..70cf045d43c 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.h +++ b/storage/tokudb/PerconaFT/ft/ft-ops.h @@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/msg.h" #include "util/dbt.h" +#define OS_PATH_SEPARATOR '/' + typedef struct ft_handle *FT_HANDLE; int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result)); diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index 699fcc57603..7c94b4c59d3 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val char* fname_in_env = toku_cachefile_fname_in_env(cachefile); assert(fname_in_env); BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env}; - toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out. + if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) { + toku_log_fclose( + logger, + &lsn, + ft->h->dirty, + bs, + toku_cachefile_filenum(cachefile)); // flush the log on + // close (if new header + // is being written), + // otherwise it might + // not make it out. + toku_cachefile_do_log_recover_on_close(cachefile); + } } } if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader) diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h index d600e093bdc..7a3c4fa783c 100644 --- a/storage/tokudb/PerconaFT/ft/ft.h +++ b/storage/tokudb/PerconaFT/ft/ft.h @@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS; void toku_ft_unlink(FT_HANDLE handle); void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn); +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct); + void toku_ft_init_reflock(FT ft); void toku_ft_destroy_reflock(FT ft); void toku_ft_grab_reflock(FT ft); diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc index 6f3baa81c86..49b61138803 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc @@ -90,6 +90,10 @@ const struct logtype rollbacks[] = { {"fcreate", 'F', FA{{"FILENUM", "filenum", 0}, {"BYTESTRING", "iname", 0}, NULLFIELD}, LOG_BEGIN_ACTION_NA}, + //rename file + {"frename", 'n', FA{{"BYTESTRING", "old_iname", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, // cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data. {"cmdinsert", 'i', FA{ {"FILENUM", "filenum", 0}, @@ -195,6 +199,11 @@ const struct logtype logtypes[] = { {"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0}, {"FILENUM", "filenum", 0}, NULLFIELD}, SHOULD_LOG_BEGIN}, + {"frename", 'n', FA{{"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "old_iname", 0}, + {"FILENUM", "old_filenum", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, {"enq_insert", 'I', FA{{"FILENUM", "filenum", 0}, {"TXNID_PAIR", "xid", 0}, {"BYTESTRING", "key", 0}, diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc index 38f29773bd6..a9c30c0e37a 100644 --- a/storage/tokudb/PerconaFT/ft/logger/recover.cc +++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc @@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." +#include #include "ft/cachetable/cachetable.h" #include "ft/cachetable/checkpoint.h" #include "ft/ft.h" @@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER return 0; } +static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { + assert(renv); + assert(renv->env); + + toku_struct_stat stat; + const char *data_dir = renv->env->get_data_dir(renv->env); + bool old_exist = true; + bool new_exist = true; + + assert(data_dir); + + struct file_map_tuple *tuple; + + std::unique_ptr old_iname_full( + toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free); + std::unique_ptr new_iname_full( + toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. As it is supposed + // new file have not yet created during recovery process the 'stalled + // cachefile' container can contain only cache file of old file. + // To preserve the old cachefile file's id and keep it in + // 'stalled cachefiles' container the new file is removed + // and the old file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(new_iname_full.get()) == -1 || + toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (old_exist && !new_exist && + (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) { + if (tuple->iname) + toku_free(tuple->iname); + tuple->iname = toku_xstrdup(l->new_iname.data); + } + + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + + if (txn) + toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname); + + return 0; +} + +static int toku_recover_backward_frename(struct logtype_frename *UU(l), + RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) { int r; TOKUTXN txn = NULL; diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h index 92f1e278e1a..eb8c953b08c 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h @@ -106,6 +106,7 @@ namespace MhsRbTree { static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff; OUUInt64() : _value(0) {} OUUInt64(uint64_t s) : _value(s) {} + OUUInt64(const OUUInt64& o) : _value(o._value) {} bool operator<(const OUUInt64 &r) const { invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); return _value < r.ToInt(); @@ -182,15 +183,18 @@ namespace MhsRbTree { class Node { public: - struct BlockPair { + class BlockPair { + public: OUUInt64 _offset; OUUInt64 _size; BlockPair() : _offset(0), _size(0) {} BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} - BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {} - int operator<(const struct BlockPair &rhs) const { + BlockPair(const BlockPair &o) + : _offset(o._offset), _size(o._size) {} + + int operator<(const BlockPair &rhs) const { return _offset < rhs._offset; } int operator<(const uint64_t &o) const { return _offset < o; } @@ -203,15 +207,15 @@ namespace MhsRbTree { }; EColor _color; - struct BlockPair _hole; - struct Pair _label; + BlockPair _hole; + Pair _label; Node *_left; Node *_right; Node *_parent; Node(EColor c, Node::BlockPair h, - struct Pair lb, + Pair lb, Node *l, Node *r, Node *p) diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc index 85f29ce9813..cefe66335a6 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc @@ -53,9 +53,10 @@ static void generate_random_input() { std::srand(unsigned(std::time(0))); // set some values: - for (uint64_t i = 1; i < N; ++i) { - input_vector.push_back({i, 0}); - old_vector[i] = {i, 0}; + for (uint64_t i = 0; i < N; ++i) { + MhsRbTree::Node::BlockPair bp = {i+1, 0}; + input_vector.push_back(bp); + old_vector[i] = bp; } // using built-in random generator: std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom); diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 90eee1e580a..9f3977743a0 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -38,13 +38,13 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. /* rollback and rollforward routines. */ - -#include "ft/ft.h" +#include #include "ft/ft-ops.h" +#include "ft/ft.h" #include "ft/log_header.h" #include "ft/logger/log-internal.h" -#include "ft/txn/xids.h" #include "ft/txn/rollback-apply.h" +#include "ft/txn/xids.h" // functionality provided by roll.c is exposed by an autogenerated // header file, logheader.h @@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM filenum, // directory row lock for its dname) and we would not get this // far if there were other live handles. toku_cachefile_unlink_on_close(cf); + toku_cachefile_skip_log_recover_on_close(cf); done: return 0; } +int toku_commit_frename(BYTESTRING /* old_name */, + BYTESTRING /* new_iname */, + TOKUTXN /* txn */, + LSN UU(oplsn)) { + return 0; +} + +int toku_rollback_frename(BYTESTRING old_iname, + BYTESTRING new_iname, + TOKUTXN txn, + LSN UU(oplsn)) { + assert(txn); + assert(txn->logger); + assert(txn->logger->ct); + + CACHETABLE cachetable = txn->logger->ct; + + toku_struct_stat stat; + bool old_exist = true; + bool new_exist = true; + + std::unique_ptr old_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data), + &toku_free); + std::unique_ptr new_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data), + &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. To preserve the + // new cachefile + // file's id and keep it in 'stalled cachefiles' container the old file is + // removed + // and the new file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(old_iname_full.get()) == -1 || + toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + if (!old_exist && new_exist && + (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + // it's ok if both files do not exist on recovery + if (!old_exist && !new_exist) + assert(txn->for_recovery); + + CACHEFILE cf; + int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data)); + toku_free(old_fname_in_cf); + // There is at least one case when fclose logging cause error: + // 1) start transaction + // 2) create ft 'a'(write "fcreate" in recovery log) + // 3) rename ft 'a' to 'b'(write "frename" in recovery log) + // 4) abort transaction: + // a) rollback rename ft (renames 'b' to 'a') + // b) rollback create ft (removes 'a'): + // invokes toku_cachefile_unlink_on_close - lazy unlink on file + // close, + // it just sets corresponding flag in cachefile object + // c) write "unlink" for 'a' in recovery log + // (when transaction is aborted all locks are released, + // when file lock is released the file is closed and unlinked if + // corresponding flag is set in cachefile object) + // 5) crash + // + // After this we have the following records in recovery log: + // - create ft 'a', + // - rename 'a' to 'b', + // - unlink 'a' + // + // On recovery: + // - create 'a' + // - rename 'a' to 'b' + // - unlink 'a' - as 'a' file does not exist we have crash on assert + // here + // + // There is no need to write "unlink" in recovery log in (4a) because + // 'a' will be removed + // on transaction rollback on recovery. + toku_cachefile_skip_log_recover_on_close(cf); + } + + return 0; +} + int find_ft_from_filenum (const FT &ft, const FILENUM &filenum); int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) { FILENUM thisfnum = toku_cachefile_filenum(ft->cf); diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc index 5332a2dff55..0e3efc1a12a 100644 --- a/storage/tokudb/PerconaFT/portability/file.cc +++ b/storage/tokudb/PerconaFT/portability/file.cc @@ -356,6 +356,12 @@ toku_os_close(int fd) { // if EINTR, retry until success return r; } +int toku_os_rename(const char *old_name, const char *new_name) { + return rename(old_name, new_name); +} + +int toku_os_unlink(const char *path) { return unlink(path); } + ssize_t toku_os_read(int fd, void *buf, size_t count) { ssize_t r; diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc index 2de12699c61..5430ff84b70 100644 --- a/storage/tokudb/PerconaFT/portability/memory.cc +++ b/storage/tokudb/PerconaFT/portability/memory.cc @@ -313,6 +313,15 @@ toku_strdup(const char *s) { return (char *) toku_memdup(s, strlen(s)+1); } +char *toku_strndup(const char *s, size_t n) { + size_t s_size = strlen(s); + size_t bytes_to_copy = n > s_size ? s_size : n; + ++bytes_to_copy; + char *result = (char *)toku_memdup(s, bytes_to_copy); + result[bytes_to_copy - 1] = 0; + return result; +} + void toku_free(void *p) { if (p) { diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h index 7780536f279..5ae652d39fc 100644 --- a/storage/tokudb/PerconaFT/portability/memory.h +++ b/storage/tokudb/PerconaFT/portability/memory.h @@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default") void *toku_memdup (const void *v, size_t len); /* Toku-version of strdup. Use this so that it calls toku_malloc() */ char *toku_strdup (const char *s) __attribute__((__visibility__("default"))); - +/* Toku-version of strndup. Use this so that it calls toku_malloc() */ +char *toku_strndup(const char *s, size_t n) + __attribute__((__visibility__("default"))); /* Copy memory. Analogous to strdup() Crashes instead of returning NULL */ void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default"))); /* Toku-version of strdup. Use this so that it calls toku_xmalloc() Crashes instead of returning NULL */ diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h index 921d3a309f6..f127b0fe172 100644 --- a/storage/tokudb/PerconaFT/portability/toku_portability.h +++ b/storage/tokudb/PerconaFT/portability/toku_portability.h @@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode); int toku_os_open_direct(const char *path, int oflag, int mode); int toku_os_close(int fd); int toku_os_fclose(FILE * stream); +int toku_os_rename(const char *old_name, const char *new_name); +int toku_os_unlink(const char *path); ssize_t toku_os_read(int fd, void *buf, size_t count); ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset); void toku_os_recursive_delete(const char *path); diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt index 47f6aa44a75..c01a8f0d628 100644 --- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt @@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(ov c d r) if (ov STREQUAL c) - set(gset 0) set(hset 0) + set(iset 0) else () - set(gset 0 1 2 3 4 5) - set(hset 0 1) + set(hset 0 1 2 3 4 5) + set(iset 0 1) endif () foreach(av 0 1) @@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(dv ${dset}) foreach(ev ${eset}) foreach(fv 0 1) - foreach(gv ${gset}) + foreach(gv 0 1) foreach(hv ${hset}) - - if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) - set(iset 0 1) - else () - set(iset 0) - endif () - foreach(iv ${iset}) - set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors") - add_test(NAME ${testname} - COMMAND run_recovery_fileops_unit.sh $ ${errfile} 137 - -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} - ) - setup_toku_test_properties(${testname} ${envdir}) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + + if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) + set(jset 0 1) + else () + set(jset 0) + endif () + + foreach(jv ${jset}) + set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors") + add_test(NAME ${testname} + COMMAND run_recovery_fileops_unit.sh $ ${errfile} 137 + -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv} + ) + setup_toku_test_properties(${testname} ${envdir}) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + endforeach(jv) endforeach(iv) endforeach(hv) endforeach(gv) diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc index 2c905c5ff12..cc99ab560d8 100644 --- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc +++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc @@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." +#include +#include +#include +#include "ft/logger/logger.h" #include "test.h" #include "toku_pthread.h" -#include -#include -#include - static int do_recover; static int do_crash; static char fileop; -static int choices['I'-'A'+1]; +static int choices['J' - 'A' + 1]; const int num_choices = sizeof(choices)/sizeof(choices[0]); static DB_TXN *txn; const char *oldname = "oldfoo"; @@ -58,11 +58,14 @@ static char *cmd; static void usage(void) { - fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n" - " fileop = c/r/d (create/rename/delete)\n" - " Where # is a single digit number > 0.\n" - " A-F are required for fileop=create\n" - " A-I are required for fileop=delete, fileop=rename\n", cmd); + fprintf(stderr, + "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# " + "-F# -G# [-H# -I# -J#]\n" + " fileop = c/r/d (create/rename/delete)\n" + " Where # is a single digit number > 0.\n" + " A-G are required for fileop=create\n" + " A-I are required for fileop=delete, fileop=rename\n", + cmd); exit(1); } @@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) { return get_bool_choice('F'); } -static int -get_choice_create_type(void) { - return get_x_choice('G', 6); -} +static int get_choice_dir_per_db(void) { return get_bool_choice('G'); } + +static int get_choice_create_type(void) { return get_x_choice('H', 6); } static int get_choice_txn_does_open_close_before_fileop(void) { - return get_bool_choice('H'); + return get_bool_choice('I'); } static int get_choice_lock_table_split_fcreate(void) { - int choice = get_bool_choice('I'); + int choice = get_bool_choice('J'); if (choice) assert(fileop_did_commit()); return choice; @@ -156,63 +158,65 @@ do_args(int argc, char * const argv[]) { choices[i] = -1; } - int c; - while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) { - switch(c) { - case 'v': - verbose++; - break; - case 'q': - verbose--; - if (verbose<0) verbose=0; - break; - case 'h': - case '?': - usage(); - break; - case 'c': - do_crash = 1; - break; - case 'r': - do_recover = 1; - break; - case 'O': - if (fileop != '\0') - usage(); - fileop = optarg[0]; - switch (fileop) { - case 'c': - case 'r': - case 'd': - break; - default: - usage(); - break; - } - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - if (fileop == '\0') - usage(); - int num; - num = atoi(optarg); - if (num < 0 || num > 9) - usage(); - choices[c - 'A'] = num; - break; - case 'X': - if (strcmp(optarg, "novalgrind") == 0) { - // provide a way for the shell script runner to pass an - // arg that suppresses valgrind on this child process + char c; + while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) { + switch (c) { + case 'v': + verbose++; break; - } + case 'q': + verbose--; + if (verbose < 0) + verbose = 0; + break; + case 'h': + case '?': + usage(); + break; + case 'c': + do_crash = 1; + break; + case 'r': + do_recover = 1; + break; + case 'O': + if (fileop != '\0') + usage(); + fileop = optarg[0]; + switch (fileop) { + case 'c': + case 'r': + case 'd': + break; + default: + usage(); + break; + } + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + if (fileop == '\0') + usage(); + int num; + num = atoi(optarg); + if (num < 0 || num > 9) + usage(); + choices[c - 'A'] = num; + break; + case 'X': + if (strcmp(optarg, "novalgrind") == 0) { + // provide a way for the shell script runner to pass an + // arg that suppresses valgrind on this child process + break; + } // otherwise, fall through to an error default: usage(); @@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) { if (argc!=optind) { usage(); exit(1); } for (i = 0; i < num_choices; i++) { - if (i >= 'G' - 'A' && fileop == 'c') + if (i >= 'H' - 'A' && fileop == 'c') break; if (choices[i] == -1) usage(); @@ -261,6 +265,8 @@ static void env_startup(void) { int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag; r = db_env_create(&env, 0); CKERR(r); + r = env->set_dir_per_db(env, get_choice_dir_per_db()); + CKERR(r); env->set_errfile(env, stderr); r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -625,8 +631,11 @@ recover_and_verify(void) { else if (did_create_commit_early()) expect_old_name = 1; } - verify_file_exists(oldname, expect_old_name); - verify_file_exists(newname, expect_new_name); + // We can't expect files existence until recovery log was not flushed + if ((get_choice_flush_log_before_crash())) { + verify_file_exists(oldname, expect_old_name); + verify_file_exists(newname, expect_new_name); + } env_shutdown(); } diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h index 2d6c84126e1..d40f7795b0b 100644 --- a/storage/tokudb/PerconaFT/src/ydb-internal.h +++ b/storage/tokudb/PerconaFT/src/ydb-internal.h @@ -132,7 +132,8 @@ struct __toku_db_env_internal { int datadir_lockfd; int logdir_lockfd; int tmpdir_lockfd; - bool check_thp; // if set check if transparent huge pages are disables + bool check_thp; // if set check if transparent huge pages are disabled + bool dir_per_db; uint64_t (*get_loader_memory_size_callback)(void); uint64_t default_lock_timeout_msec; uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec); diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc index aed271bce40..3341f6d76c6 100644 --- a/storage/tokudb/PerconaFT/src/ydb.cc +++ b/storage/tokudb/PerconaFT/src/ydb.cc @@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) { return env->i->check_thp; } +static bool env_set_dir_per_db(DB_ENV *env, bool new_val) { + HANDLE_PANICKED_ENV(env); + bool r = env->i->dir_per_db; + env->i->dir_per_db = new_val; + return r; +} + +static bool env_get_dir_per_db(DB_ENV *env) { + HANDLE_PANICKED_ENV(env); + return env->i->dir_per_db; +} + +static const char *env_get_data_dir(DB_ENV *env) { + return env->i->real_data_dir; +} + static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags); static int @@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { USENV(do_backtrace); USENV(set_check_thp); USENV(get_check_thp); + USENV(set_dir_per_db); + USENV(get_dir_per_db); + USENV(get_data_dir); #undef USENV // unlocked methods @@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co if (env_is_db_with_dname_open(env, newname)) { return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n"); } - + DBT old_dname_dbt; DBT new_dname_dbt; DBT iname_dbt; @@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co r = EEXIST; } else if (r == DB_NOTFOUND) { + DBT new_iname_dbt; + // Do not rename ft file if 'dir_per_db' option is not set + auto new_iname = + env->get_dir_per_db(env) + ? generate_iname_for_rename_or_open( + env, txn, newname, false) + : std::unique_ptr( + toku_strdup(iname), &toku_free); + toku_fill_dbt( + &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1); + // remove old (dname,iname) and insert (newname,iname) in directory r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true); if (r != 0) { goto exit; } - r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true); + + // Do not rename ft file if 'dir_per_db' option is not set + if (env->get_dir_per_db(env)) + r = toku_ft_rename_iname(txn, + env->get_data_dir(env), + iname, + new_iname.get(), + env->i->cachetable); + + r = toku_db_put(env->i->directory, + txn, + &new_dname_dbt, + &new_iname_dbt, + 0, + true); if (r != 0) { goto exit; } //Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions) @@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co // otherwise, we're okay in marking this ft as remove on // commit. no new handles can open for this dictionary // because the txn has directory write locks on the dname - if (txn && !can_acquire_table_lock(env, txn, iname)) { + if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) { r = DB_LOCK_NOTGRANTED; } // We don't do anything at the ft or cachetable layer for rename. diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc index e5bd4e7d089..100d1bfa20b 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.cc +++ b/storage/tokudb/PerconaFT/src/ydb_db.cc @@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) { *statp = ydb_db_layer_status; } -static void -create_iname_hint(const char *dname, char *hint) { +void create_iname_hint(const char *dname, char *hint) { //Requires: size of hint array must be > strlen(dname) //Copy alphanumeric characters only. //Replace strings of non-alphanumeric characters with a single underscore. @@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) { *hint = '\0'; } +void create_iname_hint_for_dbdir(const char *dname, char *hint) { + assert(dname); + if (*dname == '.') + ++dname; + if (*dname == '/') + ++dname; + bool underscored = false; + bool dbdir_is_parsed = false; + // Do not change the first '/' because this is + // delimiter which splits name into database dir + // and table dir. + while (*dname) { + if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) { + char c = *dname++; + *hint++ = c; + if (c == '/') + dbdir_is_parsed = true; + underscored = false; + } else { + if (!underscored) + *hint++ = '_'; + dname++; + underscored = true; + } + } + *hint = '\0'; +} + // n < 0 means to ignore mark and ignore n // n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname // (intended for use by loader, which will create many inames using one txnid). -static char * -create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) { +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n) { int bytes; char inamebase[strlen(hint) + 8 + // hex file format version @@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma return rval; } +static uint64_t nontransactional_open_id = 0; + +std::unique_ptr generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open) { + std::unique_ptr result(nullptr, &toku_free); + char hint[strlen(dname) + 1]; + uint64_t id1 = 0; + uint64_t id2 = 0; + + if (txn) { + id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; + id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; + } else if (is_open) + id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); + + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); + + result.reset(create_iname(env, id1, id2, hint, NULL, -1)); + + return result; +} + static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode); // Effect: Do the work required of DB->close(). @@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY return r; } -static uint64_t nontransactional_open_id = 0; - // inames are created here. // algorithm: // begin txn @@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC); r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname - char *iname = (char *) iname_dbt.data; + std::unique_ptr iname( + static_cast(iname_dbt.data), &toku_free); if (r == DB_NOTFOUND && !is_db_create) { r = ENOENT; } else if (r==0 && is_db_excl) { r = EEXIST; } else if (r == DB_NOTFOUND) { - char hint[strlen(dname) + 1]; - - // create iname and make entry in directory - uint64_t id1 = 0; - uint64_t id2 = 0; - - if (txn) { - id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; - id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; - } else { - id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); - } - create_iname_hint(dname, hint); - iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname - toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1); + iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true); + toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1); // // put_flags will be 0 for performance only, avoid unnecessary query // if we are creating a hot index, per #3166, we do not want the write lock in directory grabbed. @@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // we now have an iname if (r == 0) { - r = toku_db_open_iname(db, txn, iname, flags, mode); + r = toku_db_open_iname(db, txn, iname.get(), flags, mode); if (r == 0) { db->i->dname = toku_xstrdup(dname); env_note_db_opened(db->dbenv, db); // tell env that a new db handle is open (using dname) } } - if (iname) { - toku_free(iname); - } return r; } @@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); // now create new iname char hint[strlen(dname) + 1]; - create_iname_hint(dname, hint); + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env new_inames_in_env[i] = new_iname; toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h index 8b92dd1c3cb..8be28857c14 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.h +++ b/storage/tokudb/PerconaFT/src/ydb_db.h @@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ydb-internal.h" #include "ydb_txn.h" +#include + typedef enum { YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */ YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */ @@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) { } return r; } + +void create_iname_hint_for_dbdir(const char *dname, char *hint); +void create_iname_hint(const char *dname, char *hint); +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n); +std::unique_ptr generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open); diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index 2b121189e83..5e49e8d95d0 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -543,6 +543,7 @@ static int tokudb_init_func(void *p) { db_env->change_fsync_log_period(db_env, tokudb::sysvars::fsync_log_period); db_env->set_lock_timeout_callback(db_env, tokudb_lock_timeout_callback); + db_env->set_dir_per_db(db_env, tokudb::sysvars::dir_per_db); db_env->set_loader_memory_size( db_env, diff --git a/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result new file mode 100644 index 00000000000..a36dbcb28c0 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result @@ -0,0 +1,10 @@ +SELECT @@tokudb_dir_per_db; +@@tokudb_dir_per_db +1 +TOKUDB_DATA_DIR_CHANGED +1 +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result new file mode 100644 index 00000000000..371f97406c8 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result @@ -0,0 +1,180 @@ +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +######## +######## +# tokudb_dir_per_db = (1 - 1); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 1);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = (1 - 0); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 0);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result index 6f9592ddc1f..ecd4d077206 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result @@ -2,6 +2,7 @@ set default_storage_engine='tokudb'; set tokudb_prelock_empty=false; drop table if exists t; create table t (id int primary key); +t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -15,17 +16,21 @@ insert into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; insert into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main @@ -33,6 +38,8 @@ select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; +verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -46,23 +53,28 @@ replace into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; replace into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name commit; +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result new file mode 100644 index 00000000000..cb669148445 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result @@ -0,0 +1,51 @@ +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +tokudb_row_format_test_2 tokudb_quicklz TokuDB +tokudb_row_format_test_3 tokudb_lzma TokuDB +tokudb_row_format_test_4 tokudb_uncompressed TokuDB +tokudb_row_format_test_5 tokudb_zlib TokuDB +tokudb_row_format_test_6 tokudb_lzma TokuDB +tokudb_row_format_test_7 tokudb_quicklz TokuDB +tokudb_row_format_test_8 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_uncompressed TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt new file mode 100644 index 00000000000..a9090f4d115 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt @@ -0,0 +1 @@ +--loose-tokudb_data_dir="$MYSQL_TMP_DIR" --loose-tokudb-dir-per-db=1 diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test new file mode 100644 index 00000000000..7f415a72515 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test @@ -0,0 +1,16 @@ +--source include/have_tokudb.inc + +SELECT @@tokudb_dir_per_db; + +--disable_query_log +--eval SELECT STRCMP(@@tokudb_data_dir, '$MYSQL_TMP_DIR') = 0 AS TOKUDB_DATA_DIR_CHANGED +--enable_query_log + +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; + +--file_exists $MYSQL_TMP_DIR/tokudb_test + +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test new file mode 100644 index 00000000000..b638b706d87 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test @@ -0,0 +1,76 @@ +source include/have_tokudb.inc; + +--let $DB= test +--let $DATADIR= `select @@datadir` +--let $i= 2 + +while ($i) { + --dec $i + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +--echo ######## +--echo # CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +--echo ######## + +--let $i= 2 + +while ($i) { + --dec $i + --let $inv_i= (1 - $i); + --echo ######## + --echo # tokudb_dir_per_db = $inv_i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $inv_i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc new file mode 100644 index 00000000000..bdf7d5b235f --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc @@ -0,0 +1,9 @@ +--sorted_result + +--echo ## Looking for *.tokudb files in data_dir +--source include/table_files_replace_pattern.inc +--list_files $DATADIR *.tokudb + +--echo ## Looking for *.tokudb files in data_dir/$DB +--source include/table_files_replace_pattern.inc +--list_files $DATADIR/$DB/ *.tokudb diff --git a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test index 6488f27cfbb..924b11e29d6 100644 --- a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test +++ b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test @@ -12,7 +12,7 @@ create table t (id int primary key); # verify that txn_a insert (1) blocks txn_b insert (1) and txn_b gets a duplicate key error -# should be empty +--echo t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -28,7 +28,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send insert into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='insert into t values (1)' and state='update'; source include/wait_condition.inc; @@ -37,17 +37,17 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; source include/wait_condition.inc; @@ -64,10 +64,8 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes - -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -83,7 +81,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send replace into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='replace into t values (1)' and state='update'; source include/wait_condition.inc; @@ -92,17 +90,19 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; +source include/wait_condition.inc; replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -115,8 +115,7 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test new file mode 100644 index 00000000000..6533f8c06be --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test @@ -0,0 +1,41 @@ +# +# Test TokuDB compression option additions to row_format +# +--source include/have_tokudb.inc + +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; + +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result index 6ec3a2c8079..30e0bdbebd7 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result @@ -23,6 +23,7 @@ set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR d TRUNCATE TABLE t1; set global tokudb_debug_pause_background_job_manager = FALSE; set DEBUG_SYNC = 'now SIGNAL done'; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; set session tokudb_analyze_in_background = @orig_in_background; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test index f1912faad02..50434a79a00 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test @@ -40,6 +40,7 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3); select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status; # lets flip to another connection +--source include/count_sessions.inc connect(conn1, localhost, root); # set up the DEBUG_SYNC point @@ -64,6 +65,7 @@ connection conn1; reap; connection default; disconnect conn1; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; @@ -74,3 +76,4 @@ set session tokudb_analyze_time = @orig_time; set global tokudb_cardinality_scale_percent = @orig_scale_percent; set session default_storage_engine = @orig_default_storage_engine; set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager; +--source include/wait_until_count_sessions.inc diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test index be14d8814f0..f97235a0a2d 100644 --- a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test +++ b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test @@ -56,7 +56,7 @@ partition by range (a) insert into t1 values (1), (11), (21), (33); SELECT * FROM t1; SHOW CREATE TABLE t1; ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open'; @@ -82,7 +82,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO disconnect con1; connection default; --reap ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SHOW CREATE TABLE t1; SELECT * FROM t1; diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc index 84f1c873a26..e5185615279 100644 --- a/storage/tokudb/tokudb_sysvars.cc +++ b/storage/tokudb/tokudb_sysvars.cc @@ -66,6 +66,7 @@ uint read_status_frequency = 0; my_bool strip_frm_data = FALSE; char* tmp_dir = NULL; uint write_status_frequency = 0; +my_bool dir_per_db = FALSE; char* version = (char*) TOKUDB_VERSION_STR; // file system reserve as a percentage of total disk space @@ -394,6 +395,18 @@ static MYSQL_SYSVAR_UINT( ~0U, 0); +static void tokudb_dir_per_db_update(THD* thd, + struct st_mysql_sys_var* sys_var, + void* var, const void* save) { + my_bool *value = (my_bool *) var; + *value = *(const my_bool *) save; + db_env->set_dir_per_db(db_env, *value); +} + +static MYSQL_SYSVAR_BOOL(dir_per_db, dir_per_db, + 0, "TokuDB store ft files in db directories", + NULL, tokudb_dir_per_db_update, FALSE); + #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL static MYSQL_SYSVAR_STR( gdb_path, @@ -935,6 +948,7 @@ st_mysql_sys_var* system_variables[] = { MYSQL_SYSVAR(tmp_dir), MYSQL_SYSVAR(version), MYSQL_SYSVAR(write_status_frequency), + MYSQL_SYSVAR(dir_per_db), #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL MYSQL_SYSVAR(gdb_path), diff --git a/storage/tokudb/tokudb_sysvars.h b/storage/tokudb/tokudb_sysvars.h index 70784fdcae3..c446e212570 100644 --- a/storage/tokudb/tokudb_sysvars.h +++ b/storage/tokudb/tokudb_sysvars.h @@ -81,6 +81,7 @@ extern uint read_status_frequency; extern my_bool strip_frm_data; extern char* tmp_dir; extern uint write_status_frequency; +extern my_bool dir_per_db; extern char* version; #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL From 82ab92bd66eaaf951d49082a5c142759da59b137 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Tue, 25 Oct 2016 22:35:35 +0000 Subject: [PATCH 84/96] MDEV-10951 Field_newdate::cmp access violation The crash is caused by macro uint3korr() accessing memory (1 byte) past the end of allocated page. The macro is written such it reads 4 bytes instead of 3 and discards the value of the last byte. However, it is not always guaranteed that all uint3korr accesses will be valid (i.e that the caller allocates an extra byte after the value). In particular, the tree in Item_func_group_concat does not account for any extra bytes that it would need for comparison of keys in some cases (Field_newdate::cmp, Field_medium::cmp) The fix change uint3korr so it does not access extra bytes. --- include/byte_order_generic_x86.h | 10 ---------- include/byte_order_generic_x86_64.h | 8 -------- 2 files changed, 18 deletions(-) diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h index 0a71a17829b..a97dd0f43a3 100644 --- a/include/byte_order_generic_x86.h +++ b/include/byte_order_generic_x86.h @@ -27,19 +27,9 @@ ((uint32) (uchar) (A)[0]))) #define sint4korr(A) (*((const long *) (A))) #define uint2korr(A) (*((const uint16 *) (A))) - -/* - Attention: Please, note, uint3korr reads 4 bytes (not 3)! - It means, that you have to provide enough allocated space. -*/ -#if defined(HAVE_valgrind) && !defined(_WIN32) #define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ (((uint32) ((uchar) (A)[2])) << 16)) -#else -#define uint3korr(A) (long) (*((const unsigned int *) (A)) & 0xFFFFFF) -#endif - #define uint4korr(A) (*((const uint32 *) (A))) #define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h index b6b0c5d8ea5..8c7493965a9 100644 --- a/include/byte_order_generic_x86_64.h +++ b/include/byte_order_generic_x86_64.h @@ -27,17 +27,9 @@ ((uint32) (uchar) (A)[0]))) #define sint4korr(A) (int32) (*((int32 *) (A))) #define uint2korr(A) (uint16) (*((uint16 *) (A))) -/* - Attention: Please, note, uint3korr reads 4 bytes (not 3)! - It means, that you have to provide enough allocated space. -*/ -#if defined(HAVE_valgrind) && !defined(_WIN32) #define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ (((uint32) ((uchar) (A)[2])) << 16)) -#else -#define uint3korr(A) (uint32) (*((unsigned int *) (A)) & 0xFFFFFF) -#endif #define uint4korr(A) (uint32) (*((uint32 *) (A))) #define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ From ad5b88a892d3e78c7192f5eb77094b46c600ab94 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 26 Oct 2016 09:26:34 +0000 Subject: [PATCH 85/96] Fix build error in XtraDB on Windows. coming from Percona's workaround for glibc bug http://bugs.mysql.com/bug.php?id=82886 --- storage/xtradb/os/os0thread.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc index 9729268348c..af826027efc 100644 --- a/storage/xtradb/os/os0thread.cc +++ b/storage/xtradb/os/os0thread.cc @@ -220,10 +220,19 @@ void os_thread_join( os_thread_t thread) { + /*This function is currently only used to workaround glibc bug + described in http://bugs.mysql.com/bug.php?id=82886 + + On Windows, no workarounds are necessary, all threads + are "detached" upon thread exit (handle is closed), so we do + nothing. + */ +#ifndef _WIN32 int ret MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL); /* Waiting on already-quit threads is allowed */ ut_ad(ret == 0 || ret == ESRCH); +#endif } /*****************************************************************//** From 9155cc7090998a5b28a1f502466640b08242c6e8 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 31 Aug 2016 15:57:02 +1000 Subject: [PATCH 86/96] MDEV-10292: Tokudb - PerconaFT - compile error in recent gcc The following directives to ignore warnings where in the PerconaFT build in tokudb. These generate errors when g++ ... -o xxx.so is used to compile are shared object. As these don't actually hit any warnings they have been removed. * -Wno-ignored-attributes * -Wno-pointer-bool-conversion Signed-off-by: Daniel Black --- storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake index a7292a89d87..769bdffa5d9 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake @@ -98,9 +98,7 @@ set_cflags_if_supported( -Wno-error=address-of-array-temporary -Wno-error=tautological-constant-out-of-range-compare -Wno-error=maybe-uninitialized - -Wno-ignored-attributes -Wno-error=extern-c-compat - -Wno-pointer-bool-conversion -fno-rtti -fno-exceptions -Wno-error=nonnull-compare From a3c980b381ead0ea13df8314258c7a8d11fe5cd1 Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Mon, 24 Oct 2016 15:26:11 +0400 Subject: [PATCH 87/96] MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc() Code flow hit incorrect branch while closing table instances before removal. This branch expects thread to hold open table instance, whereas CREATE OR REPLACE doesn't actually hold open table instance. Before CREATE OR REPLACE TABLE it was impossible to hit this condition in LTM_PRELOCKED mode, thus the problem didn't expose itself during DROP TABLE or DROP DATABASE. Fixed by adjusting condition to take into account LTM_PRELOCKED mode, which can be set during CREATE OR REPLACE TABLE. --- mysql-test/r/create_or_replace.result | 11 +++++++++++ mysql-test/t/create_or_replace.test | 12 ++++++++++++ sql/sql_table.cc | 3 ++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/create_or_replace.result b/mysql-test/r/create_or_replace.result index 3a894e9fcb1..a43dc2eaca4 100644 --- a/mysql-test/r/create_or_replace.result +++ b/mysql-test/r/create_or_replace.result @@ -442,3 +442,14 @@ KILL QUERY con_id; ERROR 70100: Query execution was interrupted drop table t1; DROP TABLE t2; +# +# MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc() +# +CREATE TABLE t1(a INT); +CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test'; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +LOCK TABLE t1 WRITE; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +UNLOCK TABLES; +DROP FUNCTION f1; +DROP TABLE t1; diff --git a/mysql-test/t/create_or_replace.test b/mysql-test/t/create_or_replace.test index 7bba2b341c0..b37417f39d0 100644 --- a/mysql-test/t/create_or_replace.test +++ b/mysql-test/t/create_or_replace.test @@ -386,3 +386,15 @@ drop table t1; # Cleanup # DROP TABLE t2; + +--echo # +--echo # MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc() +--echo # +CREATE TABLE t1(a INT); +CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test'; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +LOCK TABLE t1 WRITE; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +UNLOCK TABLES; +DROP FUNCTION f1; +DROP TABLE t1; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 7cf31ee4fe8..050a3383612 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2464,7 +2464,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, if (table_type && table_type != view_pseudo_hton) ha_lock_engine(thd, table_type); - if (thd->locked_tables_mode) + if (thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) { if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED)) { From 59a7bc35fc6526568e49f1087c022c5d01da088a Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Wed, 26 Oct 2016 14:09:11 +0400 Subject: [PATCH 88/96] Removed duplicate open_strategy assignments It is set in sql_yacc.yy. --- sql/sql_parse.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index cbf723c1b49..70511fcd849 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2857,12 +2857,6 @@ case SQLCOM_PREPARE: create_info.table_charset= 0; } - /* - For CREATE TABLE we should not open the table even if it exists. - If the table exists, we should either not create it or replace it - */ - lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; - /* If we are a slave, we should add OR REPLACE if we don't have IF EXISTS. This will help a slave to recover from @@ -8225,12 +8219,6 @@ bool create_table_precheck(THD *thd, TABLE_LIST *tables, if (check_fk_parent_table_access(thd, &lex->create_info, &lex->alter_info, create_table->db)) goto err; - /* - For CREATE TABLE we should not open the table even if it exists. - If the table exists, we should either not create it or replace it - */ - lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; - error= FALSE; err: From 5569ac00590ba139bbc575c20de4c682919721e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 25 Oct 2016 15:08:15 +0300 Subject: [PATCH 89/96] MDEV-11126: Crash while altering persistent virtual column Problem was that if old virtual column is computed and stored there was no check if new column is really virtual column. --- mysql-test/r/alter_table.result | 55 +++++++++++++++++++++++++++++++++ mysql-test/t/alter_table.test | 25 +++++++++++++++ sql/sql_table.cc | 1 + 3 files changed, 81 insertions(+) diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result index e572fdb197c..2e371ac6ae6 100644 --- a/mysql-test/r/alter_table.result +++ b/mysql-test/r/alter_table.result @@ -2021,3 +2021,58 @@ ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id); Warnings: Note 1061 Multiple primary key defined DROP TABLE t1; +# +# MDEV-11126 Crash while altering persistent virtual column +# +CREATE TABLE `tab1` ( +`id` bigint(20) NOT NULL AUTO_INCREMENT, +`field2` set('option1','option2','option3','option4') NOT NULL, +`field3` set('option1','option2','option3','option4','option5') NOT NULL, +`field4` set('option1','option2','option3','option4') NOT NULL, +`field5` varchar(32) NOT NULL, +`field6` varchar(32) NOT NULL, +`field7` varchar(32) NOT NULL, +`field8` varchar(32) NOT NULL, +`field9` int(11) NOT NULL DEFAULT '1', +`field10` varchar(16) NOT NULL, +`field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', +`v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, +PRIMARY KEY (`id`) +) DEFAULT CHARSET=latin1; +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128); +SHOW CREATE TABLE `tab1`; +Table Create Table +tab1 CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT; +SHOW CREATE TABLE `tab1`; +Table Create Table +tab1 CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE `tab1`; diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test index 05d915ec478..d2b8a6082a6 100644 --- a/mysql-test/t/alter_table.test +++ b/mysql-test/t/alter_table.test @@ -1712,3 +1712,28 @@ CREATE TABLE t1 ( ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id); DROP TABLE t1; +--echo # +--echo # MDEV-11126 Crash while altering persistent virtual column +--echo # + +CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, + PRIMARY KEY (`id`) +) DEFAULT CHARSET=latin1; + +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128); +SHOW CREATE TABLE `tab1`; +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT; +SHOW CREATE TABLE `tab1`; +DROP TABLE `tab1`; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 050a3383612..5d4c551d730 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -6274,6 +6274,7 @@ static bool fill_alter_inplace_info(THD *thd, (field->stored_in_db || field->vcol_info->is_in_partitioning_expr())) { if (is_equal == IS_EQUAL_NO || + !new_field->vcol_info || !field->vcol_info->is_equal(new_field->vcol_info)) ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_COLUMN_VCOL; else From 25932708b138aa89e5e9cea080e49d914f7bb724 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 26 Oct 2016 12:30:18 +0200 Subject: [PATCH 90/96] backport include/search_pattern_in_file.inc from 10.1 --- mysql-test/include/search_pattern_in_file.inc | 15 +++++++++------ mysql-test/r/lowercase_fs_on.result | 1 + mysql-test/r/named_pipe.result | 1 + mysql-test/r/view.result | 1 + mysql-test/r/wait_timeout_not_windows.result | 1 + .../innodb/r/innodb-change-buffer-recovery.result | 1 + mysql-test/suite/rpl/r/rpl_checksum.result | 1 + mysql-test/suite/rpl/r/rpl_gtid_errorlog.result | 2 ++ 8 files changed, 17 insertions(+), 6 deletions(-) diff --git a/mysql-test/include/search_pattern_in_file.inc b/mysql-test/include/search_pattern_in_file.inc index 0d09cdcd36e..84237026ed0 100644 --- a/mysql-test/include/search_pattern_in_file.inc +++ b/mysql-test/include/search_pattern_in_file.inc @@ -60,12 +60,12 @@ perl; use strict; - my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set"; - my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set"; - my $search_range= $ENV{'SEARCH_RANGE'}; + my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set"; + my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set"; + my $search_range= $ENV{'SEARCH_RANGE'}; my $file_content; $search_range= 50000 unless $search_range =~ /-?[0-9]+/; - open(FILE, "$search_file") or die("Unable to open '$search_file': $!\n"); + open(FILE, '<', $search_file) or die("Unable to open '$search_file': $!\n"); if ($search_range >= 0) { read(FILE, $file_content, $search_range, 0); } else { @@ -75,7 +75,10 @@ perl; read(FILE, $file_content, -$search_range, 0); } close(FILE); - if ( not $file_content =~ m{$search_pattern} ) { - die("# ERROR: The file '$search_file' does not contain the expected pattern $search_pattern\n->$file_content<-\n"); + $search_file =~ s{^.*?([^/\\]+)$}{$1}; + if ($file_content =~ m{$search_pattern}) { + print "FOUND /$search_pattern/ in $search_file\n" + } else { + print "NOT FOUND /$search_pattern/ in $search_file\n" } EOF diff --git a/mysql-test/r/lowercase_fs_on.result b/mysql-test/r/lowercase_fs_on.result index a090f46cfbf..b844b3f77dd 100644 --- a/mysql-test/r/lowercase_fs_on.result +++ b/mysql-test/r/lowercase_fs_on.result @@ -1,3 +1,4 @@ # # Bug#20198490 : LOWER_CASE_TABLE_NAMES=0 ON WINDOWS LEADS TO PROBLEMS # +FOUND /\[ERROR\] The server option \'lower_case_table_names\' is configured to use case sensitive table names/ in my_restart.err diff --git a/mysql-test/r/named_pipe.result b/mysql-test/r/named_pipe.result index ddd48f0ba91..43fb44beece 100644 --- a/mysql-test/r/named_pipe.result +++ b/mysql-test/r/named_pipe.result @@ -2154,3 +2154,4 @@ Privat (Private Nutzung) Mobilfunk Warnings: Warning 1052 Column 'kundentyp' in group statement is ambiguous drop table t1; +FOUND /\[ERROR\] Create named pipe failed/ in second-mysqld.err diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result index 52c379d03af..924b3a11fef 100644 --- a/mysql-test/r/view.result +++ b/mysql-test/r/view.result @@ -5432,6 +5432,7 @@ DROP FUNCTION f1; DROP VIEW v1; DROP TABLE t1, t2; create view v1 as select 1; +FOUND /mariadb-version/ in v1.frm drop view v1; # # MDEV-7260: Crash in get_best_combination when executing multi-table diff --git a/mysql-test/r/wait_timeout_not_windows.result b/mysql-test/r/wait_timeout_not_windows.result index df70aa99221..867787a8ed3 100644 --- a/mysql-test/r/wait_timeout_not_windows.result +++ b/mysql-test/r/wait_timeout_not_windows.result @@ -1,3 +1,4 @@ set global log_warnings=2; set @@wait_timeout=1; +FOUND /Aborted.*Got timeout reading communication packets/ in mysqld.1.err set global log_warnings=@@log_warnings; diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result index cc2a0373444..07e13008e27 100644 --- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result +++ b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result @@ -33,6 +33,7 @@ INSERT INTO t1 VALUES(1,'X',1); SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; SELECT b FROM t1 LIMIT 3; ERROR HY000: Lost connection to MySQL server during query +FOUND /Wrote log record for ibuf update in place operation/ in my_restart.err CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result index 94d215e596a..9e37fbf40b1 100644 --- a/mysql-test/suite/rpl/r/rpl_checksum.result +++ b/mysql-test/suite/rpl/r/rpl_checksum.result @@ -143,6 +143,7 @@ SET debug_dbug= @old_dbug; INSERT INTO t4 VALUES (2); include/wait_for_slave_sql_error.inc [errno=1590] Last_SQL_Error = 'The incident LOST_EVENTS occurred on the master. Message: error writing to the binary log' +FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: error writing to the binary log, Internal MariaDB error code: 1590/ in mysqld.2.err SELECT * FROM t4 ORDER BY a; a 1 diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result index 204615201d9..e247ea9c2a7 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result @@ -38,5 +38,7 @@ a 3 4 5 +FOUND /Slave SQL: Error 'Duplicate entry .* on query\. .*Query: '.*', Gtid 0-1-100, Internal MariaDB error code:|Slave SQL: Could not execute Write_rows.*table test.t1; Duplicate entry.*, Gtid 0-1-100, Internal MariaDB error/ in mysqld.2.err +FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: , Internal MariaDB error code: 1590/ in mysqld.2.err DROP TABLE t1; include/rpl_end.inc From 22490a0d709d0c53da94799accb038bf270ed411 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 26 Oct 2016 13:26:43 +0200 Subject: [PATCH 91/96] MDEV-8345 STOP SLAVE should not cause an ERROR to be logged to the error log cherry-pick from 5.7: commit 6b24763 Author: Manish Kumar Date: Tue Mar 27 13:10:42 2012 +0530 BUG#12977988 - ON STOP SLAVE: ERROR READING PACKET FROM SERVER: LOST CONNECTION TO MYSQL SERVER BUG#11761457 - ERROR 2013 + "ERROR READING RELAY LOG EVENT" ON STOP SLAVEBUG#12977988 - ON STOP SLAVE: ERROR READING PACKET FROM SERVER: LOST CONNECTION TO MYSQL SERVER --- .../suite/rpl/r/rpl_stop_slave_error.result | 6 ++++++ .../suite/rpl/t/rpl_stop_slave_error-slave.opt | 1 + .../suite/rpl/t/rpl_stop_slave_error.test | 17 +++++++++++++++++ sql/slave.cc | 9 +++++++-- 4 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_stop_slave_error.result create mode 100644 mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt create mode 100644 mysql-test/suite/rpl/t/rpl_stop_slave_error.test diff --git a/mysql-test/suite/rpl/r/rpl_stop_slave_error.result b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result new file mode 100644 index 00000000000..2bd372a9a91 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result @@ -0,0 +1,6 @@ +include/master-slave.inc +[connection master] +include/stop_slave.inc +NOT FOUND /Error reading packet from server: Lost connection/ in slave_log.err +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt new file mode 100644 index 00000000000..32c4527a915 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt @@ -0,0 +1 @@ +--log-error=$MYSQLTEST_VARDIR/tmp/slave_log.err diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error.test b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test new file mode 100644 index 00000000000..a88981c15c4 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test @@ -0,0 +1,17 @@ +# +# MDEV-8345 STOP SLAVE should not cause an ERROR to be logged to the error log +# +source include/have_binlog_format_mixed.inc; # don't repeat the test three times +source include/master-slave.inc; + +connection master; +sync_slave_with_master; +source include/stop_slave.inc; +let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/slave_log.err; +let SEARCH_PATTERN=Error reading packet from server: Lost connection; +let SEARCH_RANGE= -50000; +source include/search_pattern_in_file.inc; + +source include/start_slave.inc; +source include/rpl_end.inc; + diff --git a/sql/slave.cc b/sql/slave.cc index 6dc1a66a2ac..a124ca6be7e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3120,8 +3120,13 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings) *suppress_warnings= TRUE; } else - sql_print_error("Error reading packet from server: %s ( server_errno=%d)", - mysql_error(mysql), mysql_errno(mysql)); + { + if (!mi->rli.abort_slave) + { + sql_print_error("Error reading packet from server: %s (server_errno=%d)", + mysql_error(mysql), mysql_errno(mysql)); + } + } DBUG_RETURN(packet_error); } From 26b87c332ff78a7aca04930ad86fbf7acc793222 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Thu, 27 Oct 2016 00:04:26 +0400 Subject: [PATCH 92/96] MDEV-10846 Running mysqldump backup twice returns error: Table 'mysql.proc' doesn't exist. The mysql_rm_db() doesn't seem to expect the 'mysql' database to be deleted. Checks for that added. Also fixed the bug MDEV-11105 Table named 'db' has weird side effect. The db.opt file now removed separately. --- mysql-test/r/drop.result | 6 ++++++ mysql-test/t/drop.test | 9 +++++++++ sql/sql_db.cc | 26 +++++++++++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result index c23ffbe327b..c25ae9e3055 100644 --- a/mysql-test/r/drop.result +++ b/mysql-test/r/drop.result @@ -209,3 +209,9 @@ INSERT INTO table1 VALUES (1); ERROR 42S02: Unknown table 't.notable' DROP TABLE table1,table2; # End BUG#34750 +# +# MDEV-11105 Table named 'db' has weird side effect. +# +CREATE DATABASE mysqltest; +CREATE TABLE mysqltest.db(id INT); +DROP DATABASE mysqltest; diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test index d9784bc819a..a3e96953bac 100644 --- a/mysql-test/t/drop.test +++ b/mysql-test/t/drop.test @@ -313,3 +313,12 @@ INSERT INTO table1 VALUES (1); DROP TABLE table1,table2; --echo # End BUG#34750 + +--echo # +--echo # MDEV-11105 Table named 'db' has weird side effect. +--echo # + +CREATE DATABASE mysqltest; +CREATE TABLE mysqltest.db(id INT); +DROP DATABASE mysqltest; + diff --git a/sql/sql_db.cc b/sql/sql_db.cc index e89c3d9e745..0a3ff64113f 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -784,7 +784,7 @@ exit: bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) { ulong deleted_tables= 0; - bool error= true; + bool error= true, rm_mysql_schema; char path[FN_REFLEN + 16]; MY_DIR *dirp; uint length; @@ -809,6 +809,18 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) length= build_table_filename(path, sizeof(path) - 1, db, "", "", 0); strmov(path+length, MY_DB_OPT_FILE); // Append db option file name del_dbopt(path); // Remove dboption hash entry + /* + Now remove the db.opt file. + The 'find_db_tables_and_rm_known_files' doesn't remove this file + if there exists a table with the name 'db', so let's just do it + separately. We know this file exists and needs to be deleted anyway. + */ + if (my_delete_with_symlink(path, MYF(0)) && my_errno != ENOENT) + { + my_error(EE_DELETE, MYF(0), path, my_errno); + DBUG_RETURN(true); + } + path[length]= '\0'; // Remove file name /* See if the directory exists */ @@ -835,7 +847,8 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) Disable drop of enabled log tables, must be done before name locking. This check is only needed if we are dropping the "mysql" database. */ - if ((my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0)) + if ((rm_mysql_schema= + (my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0))) { for (table= tables; table; table= table->next_local) if (check_if_log_table(table, TRUE, "DROP")) @@ -848,7 +861,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) lock_db_routines(thd, dbnorm)) goto exit; - if (!in_bootstrap) + if (!in_bootstrap && !rm_mysql_schema) { for (table= tables; table; table= table->next_local) { @@ -893,10 +906,13 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) ha_drop_database(path); tmp_disable_binlog(thd); query_cache_invalidate1(thd, dbnorm); - (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */ + if (!rm_mysql_schema) + { + (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */ #ifdef HAVE_EVENT_SCHEDULER - Events::drop_schema_events(thd, dbnorm); + Events::drop_schema_events(thd, dbnorm); #endif + } reenable_binlog(thd); /* From 9d4a0dde0ae3e0d46b4c5c0967c25862d467e94e Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 24 Oct 2016 10:15:11 -0700 Subject: [PATCH 93/96] Fixed bug mdev-11096. 1. When min/max value is provided the null flag for it must be set to 0 in the bitmap Culumn_statistics::column_stat_nulls. 2. When the calculation of the selectivity of the range condition over a column requires min and max values for the column then we have to check that these values are provided. --- mysql-test/r/selectivity.result | 24 ++++++++++++++++++++++ mysql-test/r/selectivity_innodb.result | 28 ++++++++++++++++++++++++-- mysql-test/t/selectivity.test | 21 ++++++++++++++++++- sql/sql_statistics.cc | 15 ++++---------- sql/sql_statistics.h | 5 +++++ 5 files changed, 79 insertions(+), 14 deletions(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 620bdc6bd50..c2364e11ceb 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1446,3 +1446,27 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 0acbb465ba8..882f51515b2 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -802,9 +802,9 @@ insert into t2 values (2),(3); explain extended select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 1 0.00 Using where +1 PRIMARY t1 ALL NULL NULL NULL NULL 1 100.00 Using where 1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 100.00 -2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 0.00 +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 100.00 Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3)) select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); @@ -1450,6 +1450,30 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; set @tmp_oucs= @@optimizer_use_condition_selectivity; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index c46ff69295f..1321046009e 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -970,6 +970,25 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit DROP TABLE t1,t2; - +set use_stat_tables=@save_use_stat_tables; + +--echo # +--echo # Bug mdev-11096: range condition over column without statistical data +--echo # + +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; + +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); + +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +select * from t1 where col1 > 'b' and col1 < 'e'; + +drop table t1; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 47a5a40ebeb..70080a6b4f1 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1003,11 +1003,13 @@ public: switch (i) { case COLUMN_STAT_MIN_VALUE: + table_field->read_stats->min_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->min_value->store(val.ptr(), val.length(), &my_charset_bin); break; case COLUMN_STAT_MAX_VALUE: + table_field->read_stats->max_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->max_value->store(val.ptr(), val.length(), &my_charset_bin); @@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field, { double avg_frequency= col_stats->get_avg_frequency(); res= avg_frequency; - /* - psergey-todo: what does check for min_value, max_value mean? - min/max_value are set to NULL in alloc_statistics_for_table() and - alloc_statistics_for_table_share(). Both functions will immediately - call create_min_max_statistical_fields_for_table and - create_min_max_statistical_fields_for_table_share() respectively, - which will set min/max_value to be valid pointers, unless OOM - occurs. - */ if (avg_frequency > 1.0 + 0.000001 && - col_stats->min_value && col_stats->max_value) + col_stats->min_max_values_are_provided()) { Histogram *hist= &col_stats->histogram; if (hist->is_available()) @@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field, } else { - if (col_stats->min_value && col_stats->max_value) + if (col_stats->min_max_values_are_provided()) { double sel, min_mp_pos, max_mp_pos; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 46e5cef22d1..8e5f8107849 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -388,6 +388,11 @@ public: avg_frequency= (ulong) (val * Scale_factor_avg_frequency); } + bool min_max_values_are_provided() + { + return !is_null(COLUMN_STAT_MIN_VALUE) && + !is_null(COLUMN_STAT_MIN_VALUE); + } }; From d451d772fdaa554eeb96ae12f96c3a32a6fd4d66 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Wed, 26 Oct 2016 10:59:38 -0700 Subject: [PATCH 94/96] Fixed bug mdev-9628. In the function create_key_parts_for_pseudo_indexes() the key part structures of pseudo-indexes created for BLOB fields were set incorrectly. Also the key parts for long fields must be 'truncated' up to the maximum length acceptable for key parts. --- mysql-test/r/selectivity.result | 47 ++++++++++++++ mysql-test/r/selectivity_innodb.result | 85 ++++++++++++++++++++++++++ mysql-test/t/selectivity.test | 33 ++++++++++ mysql-test/t/selectivity_innodb.test | 25 ++++++++ sql/opt_range.cc | 9 ++- 5 files changed, 198 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index c2364e11ceb..8fb5cd17c51 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1470,3 +1470,50 @@ d drop table t1; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-9628: unindexed blob column without min-max statistics +# with optimizer_use_condition_selectivity=3 +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +select * from t1 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd')) +select * from t2 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd')) +select * from t2 where col1 < 'b' and col1 > 'd'; +col1 +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0 +drop table t1,t2; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 882f51515b2..3d15131dbb5 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -1474,6 +1474,53 @@ d drop table t1; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-9628: unindexed blob column without min-max statistics +# with optimizer_use_condition_selectivity=3 +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +select * from t1 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd')) +select * from t2 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd')) +select * from t2 where col1 < 'b' and col1 > 'd'; +col1 +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0 +drop table t1,t2; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; set @tmp_oucs= @@optimizer_use_condition_selectivity; @@ -1560,6 +1607,44 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na parent_id child_group_id child_user_id id lower_group_name directory_id id drop table t1,t2,t3; # +# MDEV-9187: duplicate of bug mdev-9628 +# +set use_stat_tables = preferably; +set optimizer_use_condition_selectivity=3; +CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB; +INSERT INTO t1 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT * FROM t1 WHERE f1 < 'm'; +f1 +foo +bar +EXPLAIN EXTENDED +SELECT * FROM t1 WHERE f1 < 'm'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 72.09 Using where +Warnings: +Note 1003 select `test`.`t1`.`f1` AS `f1` from `test`.`t1` where (`test`.`t1`.`f1` < 'm') +CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB; +INSERT INTO t2 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +SELECT * FROM t2 WHERE f1 <> 'qux'; +f1 +foo +bar +EXPLAIN EXTENDED +SELECT * FROM t2 WHERE f1 <> 'qux'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`f1` AS `f1` from `test`.`t2` where (`test`.`t2`.`f1` <> 'qux') +DROP TABLE t1,t2; +# # End of 10.0 tests # set use_stat_tables= @tmp_ust; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index 1321046009e..8efc5216ba0 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -992,3 +992,36 @@ drop table t1; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; +--echo # +--echo # Bug mdev-9628: unindexed blob column without min-max statistics +--echo # with optimizer_use_condition_selectivity=3 +--echo # + +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; + +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; + +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; + +select * from t1 where col1 > 'b' and col1 < 'd'; +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; + +select * from t2 where col1 > 'b' and col1 < 'd'; +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; + +select * from t2 where col1 < 'b' and col1 > 'd'; +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; + +drop table t1,t2; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; + diff --git a/mysql-test/t/selectivity_innodb.test b/mysql-test/t/selectivity_innodb.test index d6a77eac600..25aa0abbc3b 100644 --- a/mysql-test/t/selectivity_innodb.test +++ b/mysql-test/t/selectivity_innodb.test @@ -109,6 +109,31 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na drop table t1,t2,t3; +--echo # +--echo # MDEV-9187: duplicate of bug mdev-9628 +--echo # + +set use_stat_tables = preferably; +set optimizer_use_condition_selectivity=3; + +CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB; +INSERT INTO t1 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t1; + +SELECT * FROM t1 WHERE f1 < 'm'; +EXPLAIN EXTENDED +SELECT * FROM t1 WHERE f1 < 'm'; + +CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB; +INSERT INTO t2 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t2; + +SELECT * FROM t2 WHERE f1 <> 'qux'; +EXPLAIN EXTENDED +SELECT * FROM t2 WHERE f1 <> 'qux'; + +DROP TABLE t1,t2; + --echo # --echo # End of 10.0 tests --echo # diff --git a/sql/opt_range.cc b/sql/opt_range.cc index e0ca43e6d72..5d6891a1edf 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3345,9 +3345,16 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, { Field *field= *field_ptr; uint16 store_length; + uint16 max_key_part_length= (uint16) table->file->max_key_part_length(); key_part->key= keys; key_part->part= 0; - key_part->length= (uint16) field->key_length(); + if (field->flags & BLOB_FLAG) + key_part->length= max_key_part_length; + else + { + key_part->length= (uint16) field->key_length(); + set_if_smaller(key_part->length, max_key_part_length); + } store_length= key_part->length; if (field->real_maybe_null()) store_length+= HA_KEY_NULL_LENGTH; From a0795655ab8d1cbcd88a155ba72ebf93864f82dc Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 27 Oct 2016 12:23:31 +0200 Subject: [PATCH 95/96] MDEV-10846 Running mysqldump backup twice returns error: Table 'mysql.proc' doesn't exist. Update test results after 26b87c3 --- mysql-test/r/mysqldump.result | 3 --- 1 file changed, 3 deletions(-) diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result index b6de51c8b03..cb3c28f42cd 100644 --- a/mysql-test/r/mysqldump.result +++ b/mysql-test/r/mysqldump.result @@ -5236,9 +5236,6 @@ SET @@global.log_output="TABLE"; SET @@global.general_log='OFF'; SET @@global.slow_query_log='OFF'; DROP DATABASE mysql; -Warnings: -Error 1146 Table 'mysql.proc' doesn't exist -Error 1146 Table 'mysql.event' doesn't exist SHOW CREATE TABLE mysql.general_log; Table Create Table general_log CREATE TABLE `general_log` ( From eca8c324e9a02f530853580991b11b587f54b24a Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Thu, 27 Oct 2016 19:07:55 +0200 Subject: [PATCH 96/96] Typo fixed. --- sql/item_subselect.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 5cdfa427997..e70922bb5d3 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -2620,8 +2620,8 @@ static bool check_equality_for_exist2in(Item_func *func, args[0]->all_used_tables() == OUTER_REF_TABLE_BIT) { /* It is Item_field or Item_direct_view_ref) */ - DBUG_ASSERT(args[0]->type() == Item::FIELD_ITEM || - args[0]->type() == Item::REF_ITEM); + DBUG_ASSERT(args[1]->type() == Item::FIELD_ITEM || + args[1]->type() == Item::REF_ITEM); *local_field= (Item_ident *)args[1]; *outer_exp= args[0]; return TRUE;