From 7e6b033507b783ff9b08ce17b2082e8dd5df7e8c Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 10 Sep 2021 18:10:54 +0200 Subject: [PATCH 01/29] Fix MYSQL_MAINTAINER_MODE=ERR, on Windows, with Ninja , in 10.2 A conversion warning 4267 that we want to disable(prior to 10.3), was suppressed with cmake VS generator for C++ and C, despite being set only for CXX flags. The fix is to disable the warning in C flags, too. In 10.2, this warning is noisy, in 10.3 it is fixed. --- cmake/os/Windows.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/os/Windows.cmake b/cmake/os/Windows.cmake index 1eae92924a6..75a0d71cc2d 100644 --- a/cmake/os/Windows.cmake +++ b/cmake/os/Windows.cmake @@ -150,6 +150,7 @@ IF(MSVC) IF(CMAKE_SIZEOF_VOID_P EQUAL 8) # Temporarily disable size_t warnings, due to their amount SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267") ENDIF() IF(MYSQL_MAINTAINER_MODE MATCHES "ERR") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX") From 879e21b68c34d252507298a53debb2c007a38177 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 10 Sep 2021 19:32:56 +0200 Subject: [PATCH 02/29] Define minbuild target for 10.2 --- CMakeLists.txt | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3930c5811f..67216e0e443 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -523,3 +523,40 @@ IF(NON_DISTRIBUTABLE_WARNING) MESSAGE(WARNING " You have linked MariaDB with ${NON_DISTRIBUTABLE_WARNING} libraries! You may not distribute the resulting binary. If you do, you will put yourself into a legal problem with the Free Software Foundation.") ENDIF() + +IF(NOT WITHOUT_SERVER) + # Define target for minimal mtr-testable build + ADD_CUSTOM_TARGET(minbuild) + ADD_DEPENDENCIES(minbuild + aria_chk + aria_pack + mysql + mysqladmin + mysqlbinlog + mysqlcheck + mysql_client_test + mysqldump + mysqlimport + mysql_plugin + mysqlshow + mysqlslap + mysqltest + mysql_tzinfo_to_sql + mysql_upgrade + mysqld + my_print_defaults + my_safe_process + myisam_ftdump + myisamchk + myisamlog + myisampack + perror + replace) + IF(WIN32) + ADD_DEPENDENCIES(minbuild echo my_safe_kill) + ENDIF() + ADD_CUSTOM_TARGET(smoketest + COMMAND perl ./mysql-test-run.pl main.1st + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/mysql-test) + ADD_DEPENDENCIES(smoketest minbuild) +ENDIF() From 3504f70f7f2017a98b929f182006b72ea494e1c0 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 11 Sep 2021 13:08:13 +0200 Subject: [PATCH 03/29] Bison 3.7 - fix "conversion from 'ptrdiff_t' to 'ulong', possible loss of data" --- sql/sql_parse.cc | 4 ++-- sql/sql_yacc.yy | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index f782c5c25e7..3e1f248b082 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -7226,10 +7226,10 @@ bool check_stack_overrun(THD *thd, long margin, #define MY_YACC_INIT 1000 // Start with big alloc #define MY_YACC_MAX 32000 // Because of 'short' -bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, ulong *yystacksize) +bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, size_t *yystacksize) { Yacc_state *state= & current_thd->m_parser_state->m_yacc; - ulong old_info=0; + size_t old_info=0; DBUG_ASSERT(state); if ((uint) *yystacksize >= MY_YACC_MAX) return 1; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index ec82c7f9f07..c9dbc6fa8cc 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -76,7 +76,7 @@ int yylex(void *yylval, void *yythd); #define yyoverflow(A,B,C,D,E,F) \ { \ - ulong val= *(F); \ + size_t val= *(F); \ if (my_yyoverflow((B), (D), &val)) \ { \ yyerror(thd, (char*) (A)); \ @@ -1024,7 +1024,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) } %{ -bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); +bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); %} %pure-parser /* We have threads */ From f34517237912d5cb3a6a8d36c0fb91b99f2a1715 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 10 Sep 2021 21:40:19 +0200 Subject: [PATCH 04/29] MDEV-26527 speedup appveyor build - 10.2 --- appveyor.yml | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4a6a49fd8b5..f5f1b57735a 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,21 +1,30 @@ version: build-{build}~branch-{branch} -before_build: - - md %APPVEYOR_BUILD_FOLDER%\win_build - - cd %APPVEYOR_BUILD_FOLDER%\win_build - - cmake .. -G "Visual Studio 15 2017 Win64" -DWITH_UNIT_TESTS=0 -DWITH_MARIABACKUP=0 -DMYSQL_MAINTAINER_MODE=ERR -DPLUGIN_ROCKSDB=NO -DPLUGIN_CONNECT=NO -DBISON_EXECUTABLE=C:\cygwin64\bin\bison +clone_depth: 1 -build: - project: win_build\MySQL.sln - parallel: true - verbosity: minimal - -configuration: RelWithDebInfo -platform: x64 +build_script: + # dump some system info + - echo processor='%PROCESSOR_IDENTIFIER%' , processor count= %NUMBER_OF_PROCESSORS% + - cd %APPVEYOR_BUILD_FOLDER% + # Disable unneeded submodules for the faster build + - git config submodule.storage/columnstore/columnstore.update none + - git config submodule.storage/maria/libmarias3.update none + - git config submodule.storage/rocksdb/rocksdb.update none + - git config submodule.wsrep-lib.update none + # Build minimal configuration + - mkdir _build + - cd _build + - set BUILD_TYPE=MinSizeRel + - set GENERATOR=-GNinja + - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" + - cmake -E time cmake %GENERATOR% .. -DCMAKE_BUILD_TYPE=%BUILD_TYPE% -DMYSQL_MAINTAINER_MODE=ERR -DFAST_BUILD=1 -DBISON_EXECUTABLE=C:\cygwin64\bin\bison -DPLUGIN_PERFSCHEMA=NO -DPLUGIN_FEEDBACK=NO + - set /A jobs=2*%NUMBER_OF_PROCESSORS% + - cmake -E time cmake --build . -j %jobs% --config %BUILD_TYPE% --target minbuild test_script: - - set PATH=%PATH%;C:\Program Files (x86)\Windows Kits\10\Debuggers\x64 - - cd %APPVEYOR_BUILD_FOLDER%\win_build\mysql-test - - perl mysql-test-run.pl --force --max-test-fail=10 --parallel=4 --testcase-timeout=10 --skip-test-list=unstable-tests --suite=main + - set PATH=C:\Strawberry\perl\bin;%PATH%;C:\Program Files (x86)\Windows Kits\10\Debuggers\x64 + - cd %APPVEYOR_BUILD_FOLDER%\_build\mysql-test + - set /A parallel=4*%NUMBER_OF_PROCESSORS% + - perl mysql-test-run.pl --force --max-test-fail=10 --retry=2 -parallel=%parallel% --testcase-timeout=3 --suite=main --skip-test-list=unstable-tests --mysqld=--loose-innodb-flush-log-at-trx-commit=2 -image: Visual Studio 2017 +image: Visual Studio 2019 From cef656b11c353897b0d7f6463c2971ae8a3b13b4 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 11 Sep 2021 16:47:30 +0200 Subject: [PATCH 05/29] Fix Windows warnings and tests for -DPLUGIN_PERFSCHEMA=NO --- mysql-test/main/mysql_upgrade_noengine.test | 1 + sql/mdl.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/mysql-test/main/mysql_upgrade_noengine.test b/mysql-test/main/mysql_upgrade_noengine.test index 5b063fb3d8c..efe36c355ab 100644 --- a/mysql-test/main/mysql_upgrade_noengine.test +++ b/mysql-test/main/mysql_upgrade_noengine.test @@ -1,6 +1,7 @@ # # MDEV-11942 BLACKHOLE is no longer active in 10.1 by default, mysql_upgrade not handling the situation # +source include/mysql_upgrade_preparation.inc; source include/have_innodb.inc; source include/not_embedded.inc; diff --git a/sql/mdl.cc b/sql/mdl.cc index 0349d147023..3ce0e102d2d 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -2336,11 +2336,13 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout) mysql_prlock_unlock(&lock->m_rwlock); +#ifdef HAVE_PSI_INTERFACE PSI_metadata_locker_state state __attribute__((unused)); PSI_metadata_locker *locker= NULL; if (ticket->m_psi != NULL) locker= PSI_CALL_start_metadata_wait(&state, ticket->m_psi, __FILE__, __LINE__); +#endif will_wait_for(ticket); @@ -2387,8 +2389,10 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout) done_waiting_for(); +#ifdef HAVE_PSI_INTERFACE if (locker != NULL) PSI_CALL_end_metadata_wait(locker, 0); +#endif if (wait_status != MDL_wait::GRANTED) { From e42da92265e7c4f6dcbecbca69653f3d2eebdc00 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 11 Sep 2021 16:48:33 +0200 Subject: [PATCH 06/29] Use mariadb- named targets for minbuild --- CMakeLists.txt | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a54015b2dc..591920450ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -572,20 +572,21 @@ IF(NOT WITHOUT_SERVER) ADD_DEPENDENCIES(minbuild aria_chk aria_pack - mysql - mysqladmin - mysqlbinlog - mysqlcheck - mysql_client_test - mysqldump - mysqlimport - mysql_plugin - mysqlshow - mysqlslap - mysqltest - mysql_tzinfo_to_sql - mysql_upgrade - mysqld + mariadb + mariadb-admin + mariadb-binlog + mariadb-check + mariadb-client-test + mariadb-conv + mariadb-dump + mariadb-import + mariadb-plugin + mariadb-show + mariadb-slap + mariadb-test + mariadb-tzinfo-to-sql + mariadb-upgrade + mariadbd my_print_defaults my_safe_process myisam_ftdump @@ -595,7 +596,7 @@ IF(NOT WITHOUT_SERVER) perror replace) IF(WIN32) - ADD_DEPENDENCIES(minbuild echo my_safe_kill) + ADD_DEPENDENCIES(minbuild echo mariadb-install-db my_safe_kill) ENDIF() ADD_CUSTOM_TARGET(smoketest COMMAND perl ./mysql-test-run.pl main.1st From 1a6c130c4f157434e2272bacb680efa89eb4955e Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 12 Sep 2021 15:23:34 +0200 Subject: [PATCH 07/29] perfschema: use correct type for left shifts set_item() uses 1UL << bit, so is_set_item() must do the same. This fixes sporadic perfschema.show_aggregate failures (sporadic, because `bit` is the thread id, so depending on how many tests were run before perfschema.show_aggregate it can be above or below 32). --- storage/perfschema/pfs_engine_table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc index 22ed3132c6b..c0b91cbb3b0 100644 --- a/storage/perfschema/pfs_engine_table.cc +++ b/storage/perfschema/pfs_engine_table.cc @@ -226,7 +226,7 @@ bool PFS_table_context::is_item_set(ulong n) { ulong word= n / m_word_size; ulong bit= n % m_word_size; - return (m_map[word] & (1 << bit)); + return (m_map[word] & (1UL << bit)); } From f6717c4af66c9a295c789dc30091cac6dc719350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 13 Sep 2021 11:55:14 +0300 Subject: [PATCH 08/29] MDEV-24258 fixup: Do not update dict_table_t::n_ref_count row_purge_remove_clust_if_poss_low(): The table identified by SYS_INDEXES.TABLE_ID is protected by exclusive dict_sys.latch. There is no need to touch its reference count. --- storage/innobase/row/row0purge.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 29ceb0dabc1..d59e7760bba 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -115,8 +115,7 @@ row_purge_remove_clust_if_poss_low( retry: purge_sys.check_stop_FTS(); dict_sys.lock(SRW_LOCK_CALL); - table = dict_table_open_on_id( - table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); + table = dict_sys.find_table(table_id); if (!table) { dict_sys.unlock(); } else if (table->n_rec_locks) { @@ -141,7 +140,6 @@ removed: mtr.commit(); close_and_exit: if (table) { - dict_table_close(table, true); dict_sys.unlock(); } return success; @@ -166,7 +164,7 @@ close_and_exit: if (const uint32_t space_id = dict_drop_index_tree( &node->pcur, nullptr, &mtr)) { if (table) { - if (table->release()) { + if (table->get_ref_count() == 0) { dict_sys.remove(table); } else if (table->space_id == space_id) { table->space = nullptr; @@ -181,7 +179,6 @@ close_and_exit: mtr.commit(); if (table) { - dict_table_close(table, true); dict_sys.unlock(); table = nullptr; } From 07abcb5045f9914d023ca630f7a35f4fcbf18a6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Kek=C3=A4l=C3=A4inen?= Date: Sun, 5 Sep 2021 20:22:45 -0700 Subject: [PATCH 09/29] Deb: Fix Gitlab-CI/Salsa-CI builds failures The debian/salsa-ci.yml used to work also on upstream MariaDB.org branches, but has recently regressed and several jobs stopped working. These fixes are necessary to get it working again. * Partially revert 8642f592 that never worked, as MariaDB 10.2 does not have a mysql.global table nor a mariadb.sys user. Those features weren't introduced until MariaDB 10.4. * Partially revert 0268b871 as we don't want ColumnStore as part of the native Debian build. It should build only when the build is triggered via autobake-deb.sh (MariaDB.org builds). * Adjust salsa-ci.yml to cope with various Stretch to Sid upgrade issues and remove the legacy mariadb-connector-c job completely as that package hasn't been around for years anymore. * Extend Lintian overrides to be otherwise Lintian clean --- debian/autobake-deb.sh | 10 ++++++---- debian/rules | 17 +++++++++++++++-- debian/salsa-ci.yml | 28 +++++----------------------- debian/source/lintian-overrides | 6 ++++++ 4 files changed, 32 insertions(+), 29 deletions(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index c09ebb1d0cd..80bcabe7412 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -22,12 +22,14 @@ if [[ $TRAVIS ]] || [[ $GITLAB_CI ]] then # On both Travis and Gitlab the output log must stay under 4MB so make the # build less verbose - # MCOL-4149: ColumnStore builds are so slow and big that they must be skipped on - # both Travis-CI and Gitlab-CI - sed -e 's|$(CMAKEFLAGS)|$(CMAKEFLAGS) -DPLUGIN_COLUMNSTORE=NO|' \ - -i debian/rules + sed '/Add support for verbose builds/,/^$/d' -i debian/rules elif [ -d storage/columnstore/columnstore/debian ] then + # ColumnStore is explicitly disabled in the native Debian build, so allow it + # now when build is triggered by autobake-deb.sh (MariaDB.org) and when the + # build is not running on Travis or Gitlab-CI + sed '/-DPLUGIN_COLUMNSTORE=NO/d' -i debian/rules + # Take the files and part of control from MCS directory cp -v storage/columnstore/columnstore/debian/mariadb-plugin-columnstore.* debian/ echo >> debian/control cat storage/columnstore/columnstore/debian/control >> debian/control diff --git a/debian/rules b/debian/rules index 650f807e0ec..df0ee3ee7b6 100755 --- a/debian/rules +++ b/debian/rules @@ -38,6 +38,16 @@ else NUMJOBS = 1 endif +# RocksDB cannot build on 32-bit platforms +ifeq (32,$(DEB_HOST_ARCH_BITS)) + CMAKEFLAGS += -DPLUGIN_ROCKSDB=NO +endif + +# ColumnStore can build only on amd64 and arm64 +ifneq (,$(filter $(DEB_HOST_ARCH_CPU),amd64 arm64)) + CMAKEFLAGS += -DPLUGIN_COLUMNSTORE=NO +endif + ifneq ($(DEB_BUILD_ARCH),$(DEB_HOST_ARCH)) ifneq (,$(filter $(DEB_HOST_ARCH_CPU),alpha amd64 arm arm64 i386 ia64 m68k mips64el mipsel powerpc ppc64 ppc64el riscv64 s390x sh4 sparc64)) CMAKEFLAGS += -DSTACK_DIRECTION=-1 @@ -72,13 +82,15 @@ endif echo "server:Version=$(DEB_VERSION)" >> debian/substvars - # RocksDB and Column Store cannot build on 32-bit platforms + # Don't build ColumnStore as part of the native build as it does not meet the + # quality standards in Debian. Also building it requires an extra 4 GB of disk + # space which makes native Debian builds fail as the total disk space needed + # for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh. PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \ NO_UPDATE_BUILD_VERSION=1 \ dh_auto_configure --builddirectory=$(BUILDDIR) -- \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ $(CMAKEFLAGS) \ - $(if $(findstring $(DEB_HOST_ARCH_BITS),32),-DPLUGIN_ROCKSDB=NO -DPLUGIN_COLUMNSTORE=NO) \ $(if $(filter $(DEB_BUILD_ARCH),$(DEB_HOST_ARCH)),,-DIMPORT_EXECUTABLES=$(CURDIR)/builddir-native/import_executables.cmake) \ -DCOMPILATION_COMMENT="mariadb.org binary distribution" \ -DMYSQL_SERVER_SUFFIX="-$(DEB_VERSION_REVISION)" \ @@ -88,6 +100,7 @@ endif -DPLUGIN_TOKUDB=NO \ -DPLUGIN_CASSANDRA=NO \ -DPLUGIN_AWS_KEY_MANAGEMENT=NO \ + -DPLUGIN_COLUMNSTORE=NO \ -DDEB=$(DEB_VENDOR) # This is needed, otherwise 'make test' will run before binaries have been built diff --git a/debian/salsa-ci.yml b/debian/salsa-ci.yml index 45847909192..98bcab60b8f 100644 --- a/debian/salsa-ci.yml +++ b/debian/salsa-ci.yml @@ -278,6 +278,8 @@ mariadb-10.1 Stretch to mariadb-10.5 upgrade: - apt-get install -y 'default-mysql*' 'mariadb-*' 'libmariadbd*' 'libmariadbclient*' # Verify installation of MariaDB from Stretch - *test-verify-initial + # Remove manpages 4.10 that conflicts with manpages-dev 5.10 on console_ioctl.4.gz + - apt-get remove -y manpages - *test-enable-sid-repos - *test-install - service mysql status @@ -486,28 +488,8 @@ default-libmysqlclient-dev Stretch upgrade: - *test-prepare-container - apt-get install -y pkg-config default-libmysqlclient-dev - pkg-config --list-all - - *test-enable-sid-repos - - *test-install-all-libs - - *test-verify-libs - except: - variables: - - $CI_COMMIT_TAG != null && $SALSA_CI_ENABLE_PIPELINE_ON_TAGS !~ /^(1|yes|true)$/ - -mariadb-connector-c Stretch upgrade: - stage: upgrade from Buster/Stretch - needs: - - job: build - artifacts: true - image: debian:stretch - artifacts: - when: always - name: "$CI_BUILD_NAME" - paths: - - ${WORKING_DIR}/debug - script: - - *test-prepare-container - - apt-get install -y pkg-config libmariadb2 libmariadb-dev libmariadb-dev-compat - - pkg-config --list-all + # Remove manpages 4.10 that conflicts with manpages-dev 5.10 on console_ioctl.4.gz + - apt-get remove -y manpages - *test-enable-sid-repos - *test-install-all-libs - *test-verify-libs @@ -751,7 +733,7 @@ mariadb.org-10.2 to mariadb-10.5 upgrade: # prepending with --defaults-file=/etc/mysql/debian.cnf is needed in upstream 5.5–10.3 - mysql --defaults-file=/etc/mysql/debian.cnf --skip-column-names -e "SELECT @@version, @@version_comment" - echo 'SHOW DATABASES;' | mysql --defaults-file=/etc/mysql/debian.cnf - - mysql --defaults-file=/etc/mysql/debian.cnf -e "SELECT * FROM mysql.global_priv; SHOW CREATE USER root@localhost; SHOW CREATE USER 'mariadb.sys'@localhost" + - mysql --defaults-file=/etc/mysql/debian.cnf -e "SELECT * FROM mysql.user; SHOW CREATE USER root@localhost" - mysql --defaults-file=/etc/mysql/debian.cnf -e "SELECT * FROM mysql.plugin; SHOW PLUGINS" - *test-install - service mysql status diff --git a/debian/source/lintian-overrides b/debian/source/lintian-overrides index e2741658428..593e88609bf 100644 --- a/debian/source/lintian-overrides +++ b/debian/source/lintian-overrides @@ -22,3 +22,9 @@ version-substvar-for-external-package mariadb-client-10.5 -> mysql-client-core-8 version-substvar-for-external-package libmariadbd-dev -> libmariadbclient-dev # ColumnStore not used in Debian, safe to ignore. Reported upstream in https://jira.mariadb.org/browse/MDEV-24124 source-is-missing storage/columnstore/columnstore/utils/jemalloc/libjemalloc.so.2 +# Must be fixed upstream +source-is-missing storage/mroonga/vendor/groonga/examples/dictionary/html/js/jquery-ui-1.8.18.custom.js line 58 is 273 characters long (>256) +# Intentional control relationships +version-substvar-for-external-package Replaces (line 216) ${source:Version} libmariadbd-dev -> libmariadbclient-dev +version-substvar-for-external-package Replaces (line 66) ${source:Version} libmariadb-dev -> libmysqlclient-dev +version-substvar-for-external-package Replaces (line 66) ${source:Version} libmariadb-dev -> libmysqld-dev From 5527fc58617d5e1f39e035ff7a10e6b7d739e742 Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Tue, 7 Sep 2021 15:04:39 +0200 Subject: [PATCH 10/29] MDEV-21613 Failed to open table mysql.wsrep_streaming_log for writing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix sporadic failure for MTR test galera_sr.GCF-1018B. The test sometimes fails due to an error that is logged to the error log unnecessarily. A deterministic test case (included in this patch) shows that the error is loggen when a transaction is BF aborted right before it opens the streaming log table to perform fragment removal. When that happens, the attempt to open the table fails and consequently an error is logged. There is no need to log this error, as an ER_LOCK_DEADLOCK error is returned to the client. Reviewed-by: Jan Lindström --- .../suite/galera_sr/r/MDEV-21613.result | 20 +++++++++++ mysql-test/suite/galera_sr/t/MDEV-21613.cnf | 7 ++++ mysql-test/suite/galera_sr/t/MDEV-21613.test | 36 +++++++++++++++++++ sql/wsrep_client_service.cc | 1 + sql/wsrep_schema.cc | 17 ++++----- 5 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 mysql-test/suite/galera_sr/r/MDEV-21613.result create mode 100644 mysql-test/suite/galera_sr/t/MDEV-21613.cnf create mode 100644 mysql-test/suite/galera_sr/t/MDEV-21613.test diff --git a/mysql-test/suite/galera_sr/r/MDEV-21613.result b/mysql-test/suite/galera_sr/r/MDEV-21613.result new file mode 100644 index 00000000000..67af2d49331 --- /dev/null +++ b/mysql-test/suite/galera_sr/r/MDEV-21613.result @@ -0,0 +1,20 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); +connection node_1; +SET SESSION wsrep_trx_fragment_size = 1; +SET DEBUG_SYNC = "wsrep_before_fragment_removal SIGNAL fragment_removal_reached WAIT_FOR fragment_removal_continue"; +START TRANSACTION; +INSERT INTO t1 VALUES(1), (2); +COMMIT; +connect node_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_ctrl; +SET DEBUG_SYNC = "now WAIT_FOR fragment_removal_reached"; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +TRUNCATE TABLE t1; +connection node_1; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +connection node_ctrl; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/suite/galera_sr/t/MDEV-21613.cnf b/mysql-test/suite/galera_sr/t/MDEV-21613.cnf new file mode 100644 index 00000000000..d8e3488f044 --- /dev/null +++ b/mysql-test/suite/galera_sr/t/MDEV-21613.cnf @@ -0,0 +1,7 @@ +# Set thread-handling as a workaround to avoid MDEV-26528. +# The file can be removed once fixed. + +!include ../galera_2nodes.cnf + +[mysqld.1] +thread-handling=pool-of-threads diff --git a/mysql-test/suite/galera_sr/t/MDEV-21613.test b/mysql-test/suite/galera_sr/t/MDEV-21613.test new file mode 100644 index 00000000000..8a1fea1b7f5 --- /dev/null +++ b/mysql-test/suite/galera_sr/t/MDEV-21613.test @@ -0,0 +1,36 @@ +# +# MDEV-21613 - galera_sr.GCF-1018B MTR failed: +# Failed to open table mysql.wsrep_streaming_log for writing +# +# A BF abort right before fragment removal caused this error to +# be logged to the error log. +# +--source include/galera_cluster.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); + +--connection node_1 +SET SESSION wsrep_trx_fragment_size = 1; +SET DEBUG_SYNC = "wsrep_before_fragment_removal SIGNAL fragment_removal_reached WAIT_FOR fragment_removal_continue"; +START TRANSACTION; +INSERT INTO t1 VALUES(1), (2); +--send COMMIT + +--connect node_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_ctrl +SET DEBUG_SYNC = "now WAIT_FOR fragment_removal_reached"; + + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +TRUNCATE TABLE t1; + + +--connection node_1 +--error ER_LOCK_DEADLOCK +--reap + +--connection node_ctrl +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1; diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc index f045e5d271a..dd993097135 100644 --- a/sql/wsrep_client_service.cc +++ b/sql/wsrep_client_service.cc @@ -193,6 +193,7 @@ cleanup: int Wsrep_client_service::remove_fragments() { DBUG_ENTER("Wsrep_client_service::remove_fragments"); + DEBUG_SYNC(m_thd, "wsrep_before_fragment_removal"); if (wsrep_schema->remove_fragments(m_thd, Wsrep_server_state::instance().id(), m_thd->wsrep_trx().id(), diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc index 5ee6468e9c1..06f0f7840ab 100644 --- a/sql/wsrep_schema.cc +++ b/sql/wsrep_schema.cc @@ -251,13 +251,7 @@ static int open_table(THD* thd, thd->lex->query_tables_own_last= 0; if (!open_n_lock_single_table(thd, &tables, tables.lock_type, flags)) { - if (thd->is_error()) { - WSREP_WARN("Can't lock table %s.%s : %d (%s)", - schema_name->str, table_name->str, - thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); - } close_thread_tables(thd); - my_error(ER_NO_SUCH_TABLE, MYF(0), schema_name->str, table_name->str); DBUG_RETURN(1); } @@ -273,8 +267,15 @@ static int open_for_write(THD* thd, const char* table_name, TABLE** table) { LEX_CSTRING table_str= { table_name, strlen(table_name) }; if (Wsrep_schema_impl::open_table(thd, &schema_str, &table_str, TL_WRITE, table)) { - WSREP_ERROR("Failed to open table %s.%s for writing", - schema_str.str, table_name); + // No need to log an error if the query was bf aborted, + // thd client will get ER_LOCK_DEADLOCK in the end. + const bool interrupted= thd->killed || + (thd->is_error() && + (thd->get_stmt_da()->sql_errno() == ER_QUERY_INTERRUPTED)); + if (!interrupted) { + WSREP_ERROR("Failed to open table %s.%s for writing", + schema_str.str, table_name); + } return 1; } empty_record(*table); From adaf0dde7fb205818f2ff51139a3f3114bacc527 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 14 Sep 2021 19:07:49 +1000 Subject: [PATCH 11/29] MDEV-26601: mysys - O_TMPFILE ^ O_CREAT Thanks to Fabian Vogt for noticing the mutual exclusions of these open flags on tmpfs caused by mariadb opening it incorrectly. As such we clear the O_CREAT flag while opening it as O_TMPFILE. --- mysys/mf_tempfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index 51a3efa05ad..0f1c6d6b1bc 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -121,7 +121,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix, /* explictly don't use O_EXCL here has it has a different meaning with O_TMPFILE */ - if ((file= open(dir, mode | O_TMPFILE | O_CLOEXEC, + if ((file= open(dir, (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) >= 0) { my_snprintf(to, FN_REFLEN, "%s/#sql/fd=%d", dir, file); From 03e4cb2484cc302bef2886dd3d76b3acdf7f5c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 14 Sep 2021 16:23:23 +0300 Subject: [PATCH 12/29] MDEV-24512 fixup: Remove after_task_callback In commit ff5d306e296350e7489dd3decb01bad18d135411 we removed dbug_after_task_callback but forgot to revert the rest of commit bada05a88369051ee60623b006929dd728f704e8. --- tpool/task.cc | 18 +----------------- tpool/task_group.cc | 1 - tpool/tpool.h | 14 +------------- tpool/tpool_generic.cc | 3 +-- tpool/tpool_win.cc | 3 +-- 5 files changed, 4 insertions(+), 35 deletions(-) diff --git a/tpool/task.cc b/tpool/task.cc index 0b5253bc725..81ec88590ce 100644 --- a/tpool/task.cc +++ b/tpool/task.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2019, 2020, MariaDB Corporation. +/* Copyright (C) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute itand /or modify it under the terms of the GNU General Public License as published by @@ -21,21 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ namespace tpool { - -#ifndef DBUG_OFF -static callback_func_np after_task_callback; -void set_after_task_callback(callback_func_np cb) -{ - after_task_callback= cb; -} - -void execute_after_task_callback() -{ - if (after_task_callback) - after_task_callback(); -} -#endif - task::task(callback_func func, void* arg, task_group* group) : m_func(func), m_arg(arg), m_group(group) {} @@ -50,7 +35,6 @@ void execute_after_task_callback() { /* Execute directly. */ m_func(m_arg); - dbug_execute_after_task_callback(); release(); } } diff --git a/tpool/task_group.cc b/tpool/task_group.cc index 97fbb0911c8..b52fe7c0f67 100644 --- a/tpool/task_group.cc +++ b/tpool/task_group.cc @@ -53,7 +53,6 @@ namespace tpool if (t) { t->m_func(t->m_arg); - dbug_execute_after_task_callback(); t->release(); } lk.lock(); diff --git a/tpool/tpool.h b/tpool/tpool.h index d33c0608959..95e4205e601 100644 --- a/tpool/tpool.h +++ b/tpool/tpool.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2019, 2020, MariaDB Corporation. +/* Copyright (C) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute itand /or modify it under the terms of the GNU General Public License as published by @@ -184,18 +184,6 @@ class thread_pool; extern aio *create_simulated_aio(thread_pool *tp); -#ifndef DBUG_OFF -/* - This function is useful for debugging to make sure all mutexes are released - inside a task callback -*/ -void set_after_task_callback(callback_func_np cb); -void execute_after_task_callback(); -#define dbug_execute_after_task_callback() execute_after_task_callback() -#else -#define dbug_execute_after_task_callback() do{}while(0) -#endif - class thread_pool { protected: diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index 0c769d67c99..ecc489c3357 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2019, 2020, MariaDB Corporation. +/* Copyright (C) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute itand /or modify it under the terms of the GNU General Public License as published by @@ -311,7 +311,6 @@ public: return; m_callback(m_data); - dbug_execute_after_task_callback(); m_running = false; if (m_pool && m_period) diff --git a/tpool/tpool_win.cc b/tpool/tpool_win.cc index 09fd49d9411..88168b26eff 100644 --- a/tpool/tpool_win.cc +++ b/tpool/tpool_win.cc @@ -1,4 +1,4 @@ -/* Copyright(C) 2019 MariaDB +/* Copyright (C) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute itand /or modify it under the terms of the GNU General Public License as published by @@ -93,7 +93,6 @@ class thread_pool_win : public thread_pool return; } timer->m_func(timer->m_data); - dbug_execute_after_task_callback(); if (timer->m_period) timer->set_time(timer->m_period, timer->m_period); } From 717a32154cc6de81d1a0d3a820928b78aa7b1140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 14 Sep 2021 19:05:05 +0300 Subject: [PATCH 13/29] MDEV-26356 preparation: Refactor purge_state purge_coordinator_timer_callback(): Remove. We will have purge_coordinator_timer invoke purge_coordinator_callback() directly. srv_master_callback(): Invoke srv_wake_purge_thread_if_not_active() instead of purge_coordinator_timer_callback(). That is, we will trigger purge_coordinator_callback() once per second if there is any work to be done. purge_state::do_purge(): Replaces srv_do_purge(), purge_coordinator_callback_low(), and purge_coordinator_timer_callback(). The static variables inside srv_do_purge() were moved to purge_state data members. --- storage/innobase/srv/srv0srv.cc | 235 +++++++++++++------------------- 1 file changed, 94 insertions(+), 141 deletions(-) diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 1345f12a69b..fc4b744ed08 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -528,7 +528,14 @@ struct purge_coordinator_state /** Snapshot of the last history length before the purge call.*/ uint32 m_history_length; Atomic_counter m_running; - purge_coordinator_state() : m_history_length(), m_running(0) {} +private: + ulint count; + ulint n_use_threads; + ulint n_threads; + + inline void lazy_init(); +public: + inline void do_purge(); }; static purge_coordinator_state purge_state; @@ -1329,7 +1336,6 @@ bool srv_any_background_activity() static void purge_worker_callback(void*); static void purge_coordinator_callback(void*); -static void purge_coordinator_timer_callback(void*); static tpool::task_group purge_task_group; tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr, @@ -1621,24 +1627,25 @@ void srv_shutdown(bool ibuf_merge) /** The periodic master task controlling the server. */ void srv_master_callback(void*) { - static ulint old_activity_count; + static ulint old_activity_count; - ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED); + ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED); - MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); - ut_d(srv_master_do_disabled_loop()); - purge_coordinator_timer_callback(nullptr); - ulonglong counter_time = microsecond_interval_timer(); - srv_sync_log_buffer_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); + MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); + ut_d(srv_master_do_disabled_loop()); + if (!purge_state.m_running) + srv_wake_purge_thread_if_not_active(); + ulonglong counter_time= microsecond_interval_timer(); + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_LOG_FLUSH_MICROSECOND, + counter_time); - if (srv_check_activity(&old_activity_count)) { - srv_master_do_active_tasks(counter_time); - } else { - srv_master_do_idle_tasks(counter_time); - } - srv_main_thread_op_info = "sleeping"; + if (srv_check_activity(&old_activity_count)) + srv_master_do_active_tasks(counter_time); + else + srv_master_do_idle_tasks(counter_time); + + srv_main_thread_op_info= "sleeping"; } /** @return whether purge should exit due to shutdown */ @@ -1711,86 +1718,88 @@ void srv_update_purge_thread_count(uint n) Atomic_counter srv_purge_thread_count_changed; -/** Do the actual purge operation. -@param[in,out] n_total_purged total number of purged pages -@return length of history list before the last purge batch. */ -static uint32_t srv_do_purge(ulint* n_total_purged) +inline void purge_coordinator_state::do_purge() { - ulint n_pages_purged; + ut_ad(!srv_read_only_mode); + lazy_init(); + ut_ad(n_threads); + bool wakeup= false; - static ulint count = 0; - static ulint n_use_threads = 0; - static uint32_t rseg_history_len = 0; - ulint old_activity_count = srv_get_activity_count(); - static ulint n_threads = srv_n_purge_threads; + purge_coordinator_timer->disarm(); - ut_a(n_threads > 0); - ut_ad(!srv_read_only_mode); + while (purge_sys.enabled() && !purge_sys.paused()) + { +loop: + wakeup= false; + const auto sigcount= m_running; - /* Purge until there are no more records to purge and there is - no change in configuration or server state. If the user has - configured more than one purge thread then we treat that as a - pool of threads and only use the extra threads if purge can't - keep up with updates. */ + const auto old_activity_count= srv_sys.activity_count; + const auto history_size= trx_sys.history_size(); - if (n_use_threads == 0) { - n_use_threads = n_threads; - } + if (UNIV_UNLIKELY(srv_purge_thread_count_changed)) + { + /* Read the fresh value of srv_n_purge_threads, reset + the changed flag. Both are protected by purge_thread_count_mtx. - do { - if (UNIV_UNLIKELY(srv_purge_thread_count_changed)) { - /* Read the fresh value of srv_n_purge_threads, reset - the changed flag. Both variables are protected by - purge_thread_count_mtx. + This code does not run concurrently, it is executed + by a single purge_coordinator thread, and no races + involving srv_purge_thread_count_changed are possible. */ + { + std::lock_guard lk(purge_thread_count_mtx); + n_threads= n_use_threads= srv_n_purge_threads; + srv_purge_thread_count_changed= 0; + } + } + else if (history_size > m_history_length || + (srv_max_purge_lag && m_history_length > srv_max_purge_lag)) + { + /* dynamically adjust the purge thread based on redo log fill factor */ + if (n_threads > n_use_threads) + ++n_use_threads; + } + else if (n_use_threads > 1 && old_activity_count == srv_sys.activity_count) + --n_use_threads; - This code does not run concurrently, it is executed - by a single purge_coordinator thread, and no races - involving srv_purge_thread_count_changed are possible. - */ + ut_ad(n_use_threads); + ut_ad(n_use_threads <= n_threads); - std::lock_guard lk(purge_thread_count_mtx); - n_threads = n_use_threads = srv_n_purge_threads; - srv_purge_thread_count_changed = 0; - } else if (trx_sys.history_size_approx() > rseg_history_len - || (srv_max_purge_lag > 0 - && rseg_history_len > srv_max_purge_lag)) { - /* History length is now longer than what it was - when we took the last snapshot. Use more threads. */ + m_history_length= history_size; - if (n_use_threads < n_threads) { - ++n_use_threads; - } + if (history_size && + trx_purge(n_use_threads, + !(++count % srv_purge_rseg_truncate_frequency) || + purge_sys.truncate.current)) + continue; - } else if (srv_check_activity(&old_activity_count) - && n_use_threads > 1) { + if (m_running == sigcount) + { + /* Purge was not woken up by srv_wake_purge_thread_if_not_active() */ - /* History length same or smaller since last snapshot, - use fewer threads. */ + /* The magic number 5000 is an approximation for the case where we have + cached undo log records which prevent truncate of rollback segments. */ + wakeup= history_size && + (history_size >= 5000 || + history_size != trx_sys.history_size_approx()); + break; + } + else if (!trx_sys.history_exists()) + break; - --n_use_threads; - } + if (!srv_purge_should_exit()) + goto loop; + } - /* Ensure that the purge threads are less than what - was configured. */ + if (wakeup) + purge_coordinator_timer->set_time(10, 0); - ut_a(n_use_threads > 0); - ut_a(n_use_threads <= n_threads); + m_running= 0; +} - /* Take a snapshot of the history list before purge. */ - if (!(rseg_history_len = trx_sys.history_size())) { - break; - } - - n_pages_purged = trx_purge( - n_use_threads, - !(++count % srv_purge_rseg_truncate_frequency) - || purge_sys.truncate.current); - - *n_total_purged += n_pages_purged; - } while (n_pages_purged > 0 && !purge_sys.paused() - && !srv_purge_should_exit()); - - return(rseg_history_len); +inline void purge_coordinator_state::lazy_init() +{ + if (n_threads) + return; + n_threads= n_use_threads= srv_n_purge_threads; } @@ -1836,24 +1845,6 @@ static void release_thd(THD *thd, void *ctx) set_current_thd(0); } - -/** - Called by timer when purge coordinator decides - to delay processing of purge records. -*/ -static void purge_coordinator_timer_callback(void *) -{ - if (!purge_sys.enabled() || purge_sys.paused() || purge_state.m_running) - return; - - /* The magic number 5000 is an approximation for the case where we have - cached undo log records which prevent truncate of the rollback segments. */ - if (const auto history_size= trx_sys.history_size()) - if (purge_state.m_history_length >= 5000 || - purge_state.m_history_length != history_size) - srv_wake_purge_thread_if_not_active(); -} - static void purge_worker_callback(void*) { ut_ad(!current_thd); @@ -1866,57 +1857,19 @@ static void purge_worker_callback(void*) release_thd(thd,ctx); } -static void purge_coordinator_callback_low() -{ - ulint n_total_purged= ULINT_UNDEFINED; - purge_state.m_history_length= 0; - - if (!purge_sys.enabled() || purge_sys.paused()) - return; - do - { - n_total_purged = 0; - int sigcount= purge_state.m_running; - - purge_state.m_history_length= srv_do_purge(&n_total_purged); - - /* Check if purge was woken by srv_wake_purge_thread_if_not_active() */ - - bool woken_during_purge= purge_state.m_running > sigcount; - - /* If last purge batch processed less than 1 page and there is - still work to do, delay the next batch by 10ms. Unless - someone added work and woke us up. */ - if (n_total_purged == 0) - { - if (trx_sys.history_size() == 0) - return; - if (!woken_during_purge) - { - /* Delay next purge round*/ - purge_coordinator_timer->set_time(10, 0); - return; - } - } - } - while ((purge_sys.enabled() && !purge_sys.paused()) || - !srv_purge_should_exit()); -} - static void purge_coordinator_callback(void*) { void *ctx; THD *thd= acquire_thd(&ctx); - purge_coordinator_callback_low(); - release_thd(thd,ctx); - purge_state.m_running= 0; + purge_state.do_purge(); + release_thd(thd, ctx); } void srv_init_purge_tasks() { purge_create_background_thds(srv_n_purge_threads); purge_coordinator_timer= srv_thread_pool->create_timer - (purge_coordinator_timer_callback, nullptr); + (purge_coordinator_callback, nullptr); } static void srv_shutdown_purge_tasks() From ea52a3eb9795f1ae04a4f9c2be4c81e43dc86c15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 14 Sep 2021 19:06:05 +0300 Subject: [PATCH 14/29] MDEV-26356 Adaptive purge scheduling based on redo log fill factor This should be equivalent to pull request #1889 by Krunal Bauskar. The existing logic in purge_coordinator_state::do_purge() activates a number of the configured innodb_purge_threads based on the history list length. Activating more purge worker tasks should shrink the history list faster. But, more purge workers will also generate more redo log, which may slow down writes by user connections. row_purge_parse_undo_rec(): Revert the work-around that was added in commit 4690442411b75ea0fc1a6aacabe767d6fb1d1f62. purge_coordinator_state: Keep track of the redo log fill factor (how big percentage of innodb_log_file_size is being occupied by log records that were generated since the latest checkpoint). If the redo log is getting full, log checkpoints will be triggered more frequently, and user threads may end up waiting in log_free_check(). We try to reduce purge-induced jitter in overall throughput by throttling down the active number of purge tasks as the log checkpoint age is approaching the log size (in other words, the redo log fill factor is approaching 100%). --- storage/innobase/row/row0purge.cc | 6 -- storage/innobase/srv/srv0srv.cc | 105 ++++++++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index d59e7760bba..daaba10bc5a 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1035,12 +1035,6 @@ try_again: goto err_exit; } - /* FIXME: We are acquiring exclusive dict_sys.latch only to - avoid increased wait times in - trx_purge_get_next_rec() and trx_purge_truncate_history(). */ - dict_sys.lock(SRW_LOCK_CALL); - dict_sys.unlock(); - already_locked: ut_ad(!node->table->is_temporary()); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index fc4b744ed08..59fa5ca8a12 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -533,7 +533,19 @@ private: ulint n_use_threads; ulint n_threads; + ulint lsn_lwm; + ulint lsn_hwm; + ulonglong start_time; + ulint lsn_age_factor; + + static constexpr ulint adaptive_purge_threshold= 20; + static constexpr ulint safety_net= 20; + ulint series[innodb_purge_threads_MAX + 1]; + + inline void compute_series(); inline void lazy_init(); + void refresh(bool full); + public: inline void do_purge(); }; @@ -1731,8 +1743,15 @@ inline void purge_coordinator_state::do_purge() { loop: wakeup= false; + const auto now= my_interval_timer(); const auto sigcount= m_running; + if (now - start_time >= 1000000) + { + refresh(false); + start_time= now; + } + const auto old_activity_count= srv_sys.activity_count; const auto history_size= trx_sys.history_size(); @@ -1749,16 +1768,37 @@ loop: n_threads= n_use_threads= srv_n_purge_threads; srv_purge_thread_count_changed= 0; } + refresh(true); + start_time= now; } - else if (history_size > m_history_length || - (srv_max_purge_lag && m_history_length > srv_max_purge_lag)) + else if (history_size > m_history_length) { /* dynamically adjust the purge thread based on redo log fill factor */ - if (n_threads > n_use_threads) + if (n_use_threads < n_threads && lsn_age_factor < lsn_lwm) + { +more_threads: ++n_use_threads; + lsn_hwm= lsn_lwm; + lsn_lwm-= series[n_use_threads]; + } + else if (n_use_threads > 1 && lsn_age_factor >= lsn_hwm) + { +fewer_threads: + --n_use_threads; + lsn_lwm= lsn_hwm; + lsn_hwm+= series[n_use_threads]; + } + else if (n_use_threads == 1 && lsn_age_factor >= 100 - safety_net) + { + wakeup= true; + break; + } } + else if (n_threads > n_use_threads && + srv_max_purge_lag && m_history_length > srv_max_purge_lag) + goto more_threads; else if (n_use_threads > 1 && old_activity_count == srv_sys.activity_count) - --n_use_threads; + goto fewer_threads; ut_ad(n_use_threads); ut_ad(n_use_threads <= n_threads); @@ -1795,11 +1835,68 @@ loop: m_running= 0; } +inline void purge_coordinator_state::compute_series() +{ + ulint points= n_threads; + memset(series, 0, sizeof series); + constexpr ulint spread= 100 - adaptive_purge_threshold - safety_net; + + /* We distribute spread across n_threads, + e.g.: spread of 60 is distributed across n_threads=4 as: 6+12+18+24 */ + + const ulint additional_points= (points * (points + 1)) / 2; + if (spread % additional_points == 0) + { + /* Arithmetic progression is possible. */ + const ulint delta= spread / additional_points; + ulint growth= delta; + do + { + series[points--]= growth; + growth += delta; + } + while (points); + return; + } + + /* Use average distribution to spread across the points */ + const ulint delta= spread / points; + ulint total= 0; + do + { + series[points--]= delta; + total+= delta; + } + while (points); + + for (points= 1; points <= n_threads && total++ < spread; ) + series[points++]++; +} + inline void purge_coordinator_state::lazy_init() { if (n_threads) return; n_threads= n_use_threads= srv_n_purge_threads; + refresh(true); + start_time= my_interval_timer(); +} + +void purge_coordinator_state::refresh(bool full) +{ + if (full) + { + compute_series(); + lsn_lwm= adaptive_purge_threshold; + lsn_hwm= adaptive_purge_threshold + series[n_threads]; + } + + mysql_mutex_lock(&log_sys.mutex); + const lsn_t last= log_sys.last_checkpoint_lsn, + max_age= log_sys.max_checkpoint_age; + mysql_mutex_unlock(&log_sys.mutex); + + lsn_age_factor= ((log_sys.get_lsn() - last) * 100) / max_age; } From 689b8d060ac890dcf1071b34a68234b30005e9a1 Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 13 Sep 2021 18:51:40 +0300 Subject: [PATCH 15/29] MDEV-23519 Protocol packet - "Original Name" info is showing alias name, instead of original name of the column When doing refactoring of temporary table field creation a mistake was done when copying the column name when creating internal temporary tables. For internal temporary tables we should use the original field name, not the item name (= alias). --- client/mysql.cc | 5 +++-- mysql-test/main/alias.result | 19 +++++++++++++++++++ mysql-test/main/alias.test | 15 +++++++++++++++ sql/sql_select.cc | 20 +++++++++++++++----- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/client/mysql.cc b/client/mysql.cc index b7a2d6c5e72..7b3f34b755f 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -3503,6 +3503,7 @@ print_field_types(MYSQL_RES *result) while ((field = mysql_fetch_field(result))) { tee_fprintf(PAGER, "Field %3u: `%s`\n" + "Org_field: `%s`\n" "Catalog: `%s`\n" "Database: `%s`\n" "Table: `%s`\n" @@ -3514,8 +3515,8 @@ print_field_types(MYSQL_RES *result) "Decimals: %u\n" "Flags: %s\n\n", ++i, - field->name, field->catalog, field->db, field->table, - field->org_table, fieldtype2str(field->type), + field->name, field->org_name, field->catalog, field->db, + field->table, field->org_table, fieldtype2str(field->type), get_charset_name(field->charsetnr), field->charsetnr, field->length, field->max_length, field->decimals, fieldflags2str(field->flags)); diff --git a/mysql-test/main/alias.result b/mysql-test/main/alias.result index defd44f2548..d8bb211539f 100644 --- a/mysql-test/main/alias.result +++ b/mysql-test/main/alias.result @@ -218,3 +218,22 @@ DELETE ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 't1 WHERE 1=1' at line 1 connection default; disconnect c1; +# +# MDEV-23519 +# +create or replace table t1 (a int); +create or replace table t2 (b int); +insert into t1 values(1),(2); +insert into t2 values(1),(2); +select t1.a as a1 from t1 as t1,t2 order by t2.b,t1.a; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 a a1 3 11 1 Y 32768 0 63 +a1 +1 +2 +1 +2 +drop table t1,t2; +# +# End of 10.4 tests +# diff --git a/mysql-test/main/alias.test b/mysql-test/main/alias.test index c02ebe2f5ff..f0c4e13abfd 100644 --- a/mysql-test/main/alias.test +++ b/mysql-test/main/alias.test @@ -226,3 +226,18 @@ connection c1; DELETE ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZROM t1 WHERE 1=1; connection default; disconnect c1; +--echo # +--echo # MDEV-23519 +--echo # +create or replace table t1 (a int); +create or replace table t2 (b int); +insert into t1 values(1),(2); +insert into t2 values(1),(2); +--enable_metadata +select t1.a as a1 from t1 as t1,t2 order by t2.b,t1.a; +--disable_metadata +drop table t1,t2; + +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 77832a657ff..1eb0a492ecd 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -17972,7 +17972,14 @@ Field *Item::create_field_for_schema(THD *thd, TABLE *table) /** Create a temporary field for Item_field (or its descendant), either direct or referenced by an Item_ref. + + param->modify_item is set when we create a field for an internal temporary + table. In this case we have to ensure the new field name is identical to + the original field name as the field will info will be sent to the client. + In other cases, the field name is set from orig_item or name if org_item is + not set. */ + Field * Item_field::create_tmp_field_from_item_field(TABLE *new_table, Item_ref *orig_item, @@ -17980,6 +17987,10 @@ Item_field::create_tmp_field_from_item_field(TABLE *new_table, { DBUG_ASSERT(!is_result_field()); Field *result; + LEX_CSTRING *new_name= (orig_item ? &orig_item->name : + !param->modify_item() ? &name : + &field->field_name); + /* If item have to be able to store NULLs but underlaid field can't do it, create_tmp_field_from_field() can't be used for tmp field creation. @@ -17998,26 +18009,25 @@ Item_field::create_tmp_field_from_item_field(TABLE *new_table, Record_addr rec(orig_item ? orig_item->maybe_null : maybe_null); const Type_handler *handler= type_handler()-> type_handler_for_tmp_table(this); - result= handler->make_and_init_table_field(orig_item ? &orig_item->name : &name, + result= handler->make_and_init_table_field(new_name, rec, *this, new_table); } else if (param->table_cant_handle_bit_fields() && field->type() == MYSQL_TYPE_BIT) { const Type_handler *handler= type_handler_long_or_longlong(); - result= handler->make_and_init_table_field(&name, + result= handler->make_and_init_table_field(new_name, Record_addr(maybe_null), *this, new_table); } else { - LEX_CSTRING *tmp= orig_item ? &orig_item->name : &name; bool tmp_maybe_null= param->modify_item() ? maybe_null : field->maybe_null(); result= field->create_tmp_field(new_table->in_use->mem_root, new_table, tmp_maybe_null); - if (result) - result->field_name= *tmp; + if (result && ! param->modify_item()) + result->field_name= *new_name; } if (result && param->modify_item()) result_field= result; From 6be0ddae5edb080219226525c4423300f063081d Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 15 Sep 2021 19:16:37 +0300 Subject: [PATCH 16/29] Fixed compiler warnings in CONNECT --- storage/connect/json.cpp | 5 ++++- storage/connect/tabrest.h | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/storage/connect/json.cpp b/storage/connect/json.cpp index b9f9492320b..17c6ba9791a 100644 --- a/storage/connect/json.cpp +++ b/storage/connect/json.cpp @@ -1051,7 +1051,7 @@ int JOBJECT::GetSize(bool b) { for (PJPR jpp = First; jpp; jpp = jpp->Next) // If b return only non null pairs - if (!b || jpp->Val && !jpp->Val->IsNull()) + if (!b || (jpp->Val && !jpp->Val->IsNull())) n++; return n; @@ -1581,10 +1581,12 @@ PVAL JVALUE::GetValue(PGLOBAL g) PVAL valp = NULL; if (DataType != TYPE_JSON) + { if (DataType == TYPE_STRG) valp = AllocateValue(g, Strp, DataType, Nd); else valp = AllocateValue(g, &LLn, DataType, Nd); + } return valp; } // end of GetValue @@ -1755,6 +1757,7 @@ void JVALUE::SetValue(PGLOBAL g, PVAL valp) case TYPE_TINY: B = valp->GetTinyValue() != 0; DataType = TYPE_BOOL; + break; case TYPE_INT: N = valp->GetIntValue(); DataType = TYPE_INTG; diff --git a/storage/connect/tabrest.h b/storage/connect/tabrest.h index 9066a89b306..901d9102e95 100644 --- a/storage/connect/tabrest.h +++ b/storage/connect/tabrest.h @@ -6,9 +6,7 @@ #pragma once #if defined(_WIN32) -static PCSZ slash = "\\"; #else // !_WIN32 -static PCSZ slash = "/"; #define stricmp strcasecmp #endif // !_WIN32 From f03fee06b0f4bdb1d754d987db3375cade02445e Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 15 Sep 2021 19:18:11 +0300 Subject: [PATCH 17/29] Improve error messages from Aria - Error on commit now returns HA_ERR_COMMIT_ERROR instead of HA_ERR_INTERNAL_ERROR - If checkpoint fails, it will now print out where it failed. --- include/handler_ername.h | 1 + include/my_base.h | 3 ++- include/my_handler_errors.h | 3 ++- sql/handler.cc | 3 +++ storage/maria/ha_maria.cc | 9 +++++---- storage/maria/ma_checkpoint.c | 20 +++++++++++++++++++- 6 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/handler_ername.h b/include/handler_ername.h index fe55062e6fb..c9d09717a14 100644 --- a/include/handler_ername.h +++ b/include/handler_ername.h @@ -79,3 +79,4 @@ { "HA_ERR_ABORTED_BY_USER", HA_ERR_ABORTED_BY_USER, "" }, { "HA_ERR_DISK_FULL", HA_ERR_DISK_FULL, "" }, { "HA_ERR_INCOMPATIBLE_DEFINITION", HA_ERR_INCOMPATIBLE_DEFINITION, "" }, +{ "HA_ERR_COMMIT_ERROR", HA_ERR_COMMIT_ERROR, "" }, diff --git a/include/my_base.h b/include/my_base.h index 6a9a14fa91e..b4e7c1c7707 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -525,7 +525,8 @@ enum ha_base_keytype { #define HA_ERR_TABLESPACE_MISSING 194 /* Missing Tablespace */ #define HA_ERR_SEQUENCE_INVALID_DATA 195 #define HA_ERR_SEQUENCE_RUN_OUT 196 -#define HA_ERR_LAST 196 /* Copy of last error nr * */ +#define HA_ERR_COMMIT_ERROR 197 +#define HA_ERR_LAST 197 /* Copy of last error nr * */ /* Number of different errors */ #define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1) diff --git a/include/my_handler_errors.h b/include/my_handler_errors.h index faf7b9f459e..69b1566557d 100644 --- a/include/my_handler_errors.h +++ b/include/my_handler_errors.h @@ -107,7 +107,8 @@ static const char *handler_error_messages[]= "Foreign key cascade delete/update exceeds max depth", "Tablespace is missing for a table", "Sequence has been run out", - "Sequence values are conflicting" + "Sequence values are conflicting", + "Error during commit" }; #endif /* MYSYS_MY_HANDLER_ERRORS_INCLUDED */ diff --git a/sql/handler.cc b/sql/handler.cc index 9cf270f9cf3..d49a695f197 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -4217,6 +4217,9 @@ void handler::print_error(int error, myf errflag) case HA_ERR_TABLE_IN_FK_CHECK: textno= ER_TABLE_IN_FK_CHECK; break; + case HA_ERR_COMMIT_ERROR: + textno= ER_ERROR_DURING_COMMIT; + break; default: { /* The error was "unknown" to this function. diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index b93039190b0..79bebc90837 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2904,7 +2904,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) if (file->autocommit) { if (ma_commit(trn)) - result= HA_ERR_INTERNAL_ERROR; + result= HA_ERR_COMMIT_ERROR; thd_set_ha_data(thd, maria_hton, 0); } } @@ -3043,7 +3043,7 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn) error= 0; if (unlikely(ma_commit(trn))) - error= 1; + error= HA_ERR_COMMIT_ERROR; if (!new_trn) { reset_thd_trn(thd, used_tables); @@ -3480,7 +3480,7 @@ static int maria_commit(handlerton *hton __attribute__ ((unused)), THD *thd, bool all) { TRN *trn= THD_TRN; - int res; + int res= 0; MARIA_HA *used_instances; DBUG_ENTER("maria_commit"); @@ -3499,7 +3499,8 @@ static int maria_commit(handlerton *hton __attribute__ ((unused)), trnman_reset_locked_tables(trn, 0); trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED); trn->used_instances= 0; - res= ma_commit(trn); + if (ma_commit(trn)) + res= HA_ERR_COMMIT_ERROR; reset_thd_trn(thd, used_instances); thd_set_ha_data(thd, maria_hton, 0); DBUG_RETURN(res); diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 3af808478e4..2741f54d7d7 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -153,8 +153,10 @@ end: static int really_execute_checkpoint(void) { uint i, error= 0; + int error_errno= 0; /** @brief checkpoint_start_log_horizon will be stored there */ char *ptr; + const char *error_place= 0; LEX_STRING record_pieces[4]; /**< only malloc-ed pieces */ LSN min_page_rec_lsn, min_trn_rec_lsn, min_first_undo_lsn; TRANSLOG_ADDRESS checkpoint_start_log_horizon; @@ -191,13 +193,19 @@ static int really_execute_checkpoint(void) &record_pieces[1], &min_trn_rec_lsn, &min_first_undo_lsn))) + { + error_place= "trnman_collect_transaction"; goto err; + } /* STEP 3: fetch information about table files */ if (unlikely(collect_tables(&record_pieces[2], checkpoint_start_log_horizon))) + { + error_place= "collect_tables"; goto err; + } /* STEP 4: fetch information about dirty pages */ @@ -211,7 +219,10 @@ static int really_execute_checkpoint(void) if (unlikely(pagecache_collect_changed_blocks_with_lsn(maria_pagecache, &record_pieces[3], &min_page_rec_lsn))) + { + error_place= "collect_pages"; goto err; + } /* LAST STEP: now write the checkpoint log record */ @@ -240,7 +251,10 @@ static int really_execute_checkpoint(void) sizeof(log_array)/sizeof(log_array[0]), log_array, NULL, NULL) || translog_flush(lsn))) + { + error_place= "translog_write_record"; goto err; + } translog_lock(); /* This cannot be done as a inwrite_rec_hook of LOGREC_CHECKPOINT, because @@ -251,6 +265,8 @@ static int really_execute_checkpoint(void) max_trid_in_control_file, recovery_failures))) { + error_place= "ma_control_file_write"; + error_errno= my_errno; translog_unlock(); goto err; } @@ -287,7 +303,9 @@ static int really_execute_checkpoint(void) err: error= 1; - ma_message_no_user(0, "checkpoint failed"); + my_printf_error(HA_ERR_GENERIC, "Aria engine: checkpoint failed at %s with " + "error %d", MYF(ME_ERROR_LOG), + error_place, (error_errno ? error_errno : my_errno)); /* we were possibly not able to determine what pages to flush */ pages_to_flush_before_next_checkpoint= 0; From 0d47945b58c0f12ca35f4803af9bfa8cc498f5cc Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 15 Sep 2021 21:21:03 +0300 Subject: [PATCH 18/29] Fixed bug in aria_chk that overwrote sort_buffer_length This bug happens when one runs aria_chk on multiple tables. It does not affect REPAIR TABLE. aria_chk tries to optimize the sort buffer size to minimize memory usage when used with small tables. The bug was that the adjusted value was used as a base for the next table, which could cause problems. --- include/myisamchk.h | 2 +- storage/maria/aria_chk.c | 4 ++-- storage/maria/ha_maria.cc | 8 ++++---- storage/maria/ma_check.c | 4 +++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/myisamchk.h b/include/myisamchk.h index f9a55ba467f..c494c672ec7 100644 --- a/include/myisamchk.h +++ b/include/myisamchk.h @@ -87,7 +87,7 @@ typedef struct st_handler_check_param /* Following is used to check if rows are visible */ ulonglong max_trid, max_found_trid; ulonglong not_visible_rows_found; - ulonglong sort_buffer_length; + ulonglong sort_buffer_length, orig_sort_buffer_length; ulonglong use_buffers; /* Used as param to getopt() */ size_t read_buffer_length, write_buffer_length, sort_key_blocks; time_t backup_time; /* To sign backup files */ diff --git a/storage/maria/aria_chk.c b/storage/maria/aria_chk.c index 7463adf34c1..30bb2cf0d96 100644 --- a/storage/maria/aria_chk.c +++ b/storage/maria/aria_chk.c @@ -434,8 +434,8 @@ static struct my_option my_long_options[] = ~0UL, (long) MALLOC_OVERHEAD, (long) 1L, 0}, { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "Size of sort buffer. Used by --recover", - &check_param.sort_buffer_length, - &check_param.sort_buffer_length, 0, GET_ULL, REQUIRED_ARG, + &check_param.orig_sort_buffer_length, + &check_param.orig_sort_buffer_length, 0, GET_ULL, REQUIRED_ARG, SORT_BUFFER_INIT, MIN_SORT_BUFFER, SIZE_T_MAX, MALLOC_OVERHEAD, 1L, 0}, { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "Internal buffer for sorting keys; Don't touch :)", diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 79bebc90837..80d3d669703 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -1481,7 +1481,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) param->testflag= ((check_opt->flags & ~(T_EXTEND)) | T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); - param->sort_buffer_length= THDVAR(thd, sort_buffer_size); + param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size); param->backup_time= check_opt->start_time; start_records= file->state->records; old_proc_info= thd_proc_info(thd, "Checking table"); @@ -1552,7 +1552,7 @@ int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt) param->thd= thd; param->op_name= "zerofill"; param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL; - param->sort_buffer_length= THDVAR(thd, sort_buffer_size); + param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size); param->db_name= table->s->db.str; param->table_name= table->alias.c_ptr(); @@ -1588,7 +1588,7 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) param->op_name= "optimize"; param->testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); - param->sort_buffer_length= THDVAR(thd, sort_buffer_size); + param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size); thd_progress_init(thd, 1); if ((error= repair(thd, param, 1)) && param->retry_repair) { @@ -2056,7 +2056,7 @@ int ha_maria::enable_indexes(uint mode) } param->myf_rw &= ~MY_WAIT_IF_FULL; - param->sort_buffer_length= THDVAR(thd,sort_buffer_size); + param->orig_sort_buffer_length= THDVAR(thd,sort_buffer_size); param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method); param->tmpdir= &mysql_tmpdir_list; if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param->retry_repair) diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 358d563b63a..a244a9326ab 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -114,7 +114,7 @@ void maria_chk_init(HA_CHECK *param) param->use_buffers= PAGE_BUFFER_INIT; param->read_buffer_length=READ_BUFFER_INIT; param->write_buffer_length=READ_BUFFER_INIT; - param->sort_buffer_length=SORT_BUFFER_INIT; + param->orig_sort_buffer_length=SORT_BUFFER_INIT; param->sort_key_blocks=BUFFERS_WHEN_SORTING; param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL); @@ -2483,6 +2483,8 @@ static int initialize_variables_for_repair(HA_CHECK *param, tmp= (size_t) MY_MIN(sort_info->filelength, (my_off_t) (SIZE_T_MAX/10/threads)); tmp= MY_MAX(tmp * 8 * threads, (size_t) 65536); /* Some margin */ + param->sort_buffer_length= MY_MIN(param->orig_sort_buffer_length, + tmp); set_if_smaller(param->sort_buffer_length, tmp); /* Protect against too big sort buffer length */ #if SIZEOF_SIZE_T >= 8 From 5b0a76078a8ea38e8e19e3e2c49f0f7e091e2f72 Mon Sep 17 00:00:00 2001 From: Eugene Kosov Date: Thu, 16 Sep 2021 16:52:20 +0600 Subject: [PATCH 19/29] MDEV-26621 assertion failue "index->table->persistent_autoinc" in /storage/innobase/btr/btr0btr.cc during IMPORT dict_index_t::clear_instant_alter(): when searhing for an AUTO_INCREMENT column don't skip the beginning of the list because the field can be at the beginning of the list --- .../innodb/r/instant_alter_import.result | 8 ++++++++ .../suite/innodb/t/instant_alter_import.test | 19 +++++++++++++++++++ storage/innobase/include/dict0mem.h | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/r/instant_alter_import.result b/mysql-test/suite/innodb/r/instant_alter_import.result index 2d9a39f7886..008a26ea6af 100644 --- a/mysql-test/suite/innodb/r/instant_alter_import.result +++ b/mysql-test/suite/innodb/r/instant_alter_import.result @@ -70,3 +70,11 @@ select * from t1; ERROR HY000: Tablespace has been discarded for table `t1` drop table t2; drop table t1; +CREATE TABLE t1 (id INT PRIMARY KEY AUTO_INCREMENT, i1 INT) ENGINE=INNODB; +CREATE TABLE t2 (id INT PRIMARY KEY AUTO_INCREMENT, i1 INT, i2 INT) ENGINE=INNODB; +ALTER TABLE t2 DROP COLUMN i2, ALGORITHM=INSTANT; +ALTER TABLE t2 DISCARD TABLESPACE; +FLUSH TABLE t1 FOR EXPORT; +UNLOCK TABLES; +ALTER TABLE t2 IMPORT TABLESPACE; +DROP TABLE t2, t1; diff --git a/mysql-test/suite/innodb/t/instant_alter_import.test b/mysql-test/suite/innodb/t/instant_alter_import.test index fb187debb51..d01f4325f1e 100644 --- a/mysql-test/suite/innodb/t/instant_alter_import.test +++ b/mysql-test/suite/innodb/t/instant_alter_import.test @@ -82,3 +82,22 @@ select * from t1; drop table t2; drop table t1; + + +--let $MYSQLD_DATADIR= `SELECT @@datadir` + +CREATE TABLE t1 (id INT PRIMARY KEY AUTO_INCREMENT, i1 INT) ENGINE=INNODB; + +CREATE TABLE t2 (id INT PRIMARY KEY AUTO_INCREMENT, i1 INT, i2 INT) ENGINE=INNODB; +ALTER TABLE t2 DROP COLUMN i2, ALGORITHM=INSTANT; +ALTER TABLE t2 DISCARD TABLESPACE; + +FLUSH TABLE t1 FOR EXPORT; + +--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t2.ibd +--copy_file $MYSQLD_DATADIR/test/t1.cfg $MYSQLD_DATADIR/test/t2.cfg + +UNLOCK TABLES; +ALTER TABLE t2 IMPORT TABLESPACE; + +DROP TABLE t2, t1; diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index b0da5b4dc39..fe029fc9fcf 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2440,7 +2440,7 @@ inline void dict_index_t::clear_instant_alter() { return a.col->ind < b.col->ind; }); table->instant = NULL; if (ai_col) { - auto a = std::find_if(begin, end, + auto a = std::find_if(fields, end, [ai_col](const dict_field_t& f) { return f.col == ai_col; }); table->persistent_autoinc = (a == end) ? 0 : 1 + (a - fields); From 65cce297beea09afc8132efc91372aaee1c70f7c Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 16 Sep 2021 14:06:29 +0300 Subject: [PATCH 20/29] Updated rocksdb test result This was required as I added a new error code to my_base.h and rocksdb is adding it's own errors after the last official one Updated result file also for index_merge_rocksdb2. This is a big test and we have probably not before noticed that some optimizer changes caused a difference. --- .../r/corrupted_data_reads_debug.result | 10 +++---- .../rocksdb/r/index_merge_rocksdb2.result | 8 +++--- .../rocksdb/r/tbl_opt_data_index_dir.result | 26 +++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result index 47f7bb923ba..01fa9dac7fd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result @@ -20,7 +20,7 @@ set @tmp1=@@rocksdb_verify_row_debug_checksums; set rocksdb_verify_row_debug_checksums=1; set session debug_dbug= "+d,myrocks_simulate_bad_row_read1"; select * from t1 where pk=1; -ERROR HY000: Got error 202 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 203 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read1"; set rocksdb_verify_row_debug_checksums=@tmp1; select * from t1 where pk=1; @@ -28,11 +28,11 @@ pk col1 1 1 set session debug_dbug= "+d,myrocks_simulate_bad_row_read2"; select * from t1 where pk=1; -ERROR HY000: Got error 202 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 203 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read2"; set session debug_dbug= "+d,myrocks_simulate_bad_row_read3"; select * from t1 where pk=1; -ERROR HY000: Got error 202 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 203 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read3"; insert into t1 values(4,'0123456789'); select * from t1; @@ -56,7 +56,7 @@ pk col1 ABCD 1 set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1"; select * from t2; -ERROR HY000: Got error 202 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 203 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1"; drop table t2; create table t2 ( @@ -69,6 +69,6 @@ pk col1 ABCD 1 set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1"; select * from t2; -ERROR HY000: Got error 202 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 203 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1"; drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result index d96c40127a8..9de77014593 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result @@ -63,7 +63,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t0 ALL i1 NULL NULL NULL # Using where explain select * from t0 where (key1 < 3 or key2 <4) and key3 = 50; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where +1 SIMPLE t0 ref i1,i2,i3 i3 4 const # Using where explain select * from t0 use index (i1,i2) where (key1 < 2 or key2 <3) and key3 = 50; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where @@ -121,11 +121,11 @@ id select_type table type possible_keys key key_len ref rows Extra explain select * from t0 where (key1 < 3 or key2 < 3) and (key3 < 70); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where +1 SIMPLE t0 range i1,i2,i3 i3 4 NULL # Using index condition; Using where explain select * from t0 where (key1 < 3 or key2 < 3) and (key3 < 1000); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where +1 SIMPLE t0 range i1,i2,i3 i3 4 NULL # Using index condition; Using where explain select * from t0 where ((key1 < 3 or key2 < 3) and (key2 <4 or key3 < 3)) or @@ -287,7 +287,7 @@ id select_type table type possible_keys key key_len ref rows Extra NULL UNION RESULT ALL NULL NULL NULL NULL NULL explain select * from (select * from t1 where key1 = 3 or key2 =3) as Z where key8 >5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index_merge i1,i2,i8 i1,i2 4,4 NULL 4 Using union(i1,i2); Using where +1 SIMPLE t1 range i1,i2,i8 i8 4 NULL 2 Using index condition; Using where create table t3 like t0; insert into t3 select * from t0; alter table t3 add key9 int not null, add index i9(key9); diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result index 95dae68b4e6..90f163b7d4b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result @@ -1,16 +1,16 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data'; -ERROR HY000: Can't create table `test`.`t1` (errno: 198 "Unknown error 198") -show warnings; -Level Code Message -Error 1005 Can't create table `test`.`t1` (errno: 198 "Unknown error 198") -Warning 1296 Got error 198 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB -CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index'; ERROR HY000: Can't create table `test`.`t1` (errno: 199 "Unknown error 199") show warnings; Level Code Message Error 1005 Can't create table `test`.`t1` (errno: 199 "Unknown error 199") -Warning 1296 Got error 199 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB +Warning 1296 Got error 199 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB +CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index'; +ERROR HY000: Can't create table `test`.`t1` (errno: 200 "Unknown error 200") +show warnings; +Level Code Message +Error 1005 Can't create table `test`.`t1` (errno: 200 "Unknown error 200") +Warning 1296 Got error 200 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id) ( PARTITION P0 VALUES LESS THAN (1000) @@ -19,11 +19,11 @@ PARTITION P1 VALUES LESS THAN (2000) DATA DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); -ERROR HY000: Can't create table `test`.`t1` (errno: 198 "Unknown error 198") +ERROR HY000: Can't create table `test`.`t1` (errno: 199 "Unknown error 199") show warnings; Level Code Message -Error 1005 Can't create table `test`.`t1` (errno: 198 "Unknown error 198") -Warning 1296 Got error 198 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB +Error 1005 Can't create table `test`.`t1` (errno: 199 "Unknown error 199") +Warning 1296 Got error 199 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory") CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id) ( @@ -33,9 +33,9 @@ PARTITION P1 VALUES LESS THAN (2000) INDEX DIRECTORY = '/foo/bar/data/', PARTITION P2 VALUES LESS THAN (MAXVALUE) ); -ERROR HY000: Can't create table `test`.`t1` (errno: 199 "Unknown error 199") +ERROR HY000: Can't create table `test`.`t1` (errno: 200 "Unknown error 200") show warnings; Level Code Message -Error 1005 Can't create table `test`.`t1` (errno: 199 "Unknown error 199") -Warning 1296 Got error 199 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB +Error 1005 Can't create table `test`.`t1` (errno: 200 "Unknown error 200") +Warning 1296 Got error 200 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory") From c430aa72abbdccb1ece7f0d65b49a6b48e7c5ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Sep 2021 20:10:42 +0300 Subject: [PATCH 21/29] MDEV-26626 InnoDB fails to advance the log checkpoint buf_flush_page_cleaner(): Always try to advance the log checkpoint, even when no pages were flushed during the latest batch. Maybe, since the previous batch, there was an LRU flush that removed the last dirty pages. Failure to advance the log checkpoint will cause unnecessary work in Mariabackup and on crash recovery. --- storage/innobase/buf/buf0flu.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 10a84d99a2e..fb687b4f51b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2266,6 +2266,15 @@ furious_flush: unemployed: buf_flush_async_lsn= 0; buf_pool.page_cleaner_set_idle(true); + + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;); + + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + + if (!recv_recovery_is_on() && srv_operation == SRV_OPERATION_NORMAL) + log_checkpoint(); + + mysql_mutex_lock(&buf_pool.flush_list_mutex); continue; } From 9d8e83b676325d9ee267bdee49d30e277851ff57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Sep 2021 20:30:08 +0300 Subject: [PATCH 22/29] MDEV-26356 fixup: Adjust innodb_max_purge_lag_wait innodb_max_purge_lag_wait_update(): To align with purge_coordinator_state::refresh(), we must trigger a page flush batch if the last log checkpoint is too old. This was caught in a hang of the test innodb_gis.rtree_compress. --- storage/innobase/handler/ha_innodb.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4887d258d25..e431e0499c4 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -228,6 +228,14 @@ static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *, { if (thd_kill_level(thd)) break; + /* Adjust for purge_coordinator_state::refresh() */ + mysql_mutex_lock(&log_sys.mutex); + const lsn_t last= log_sys.last_checkpoint_lsn, + max_age= log_sys.max_checkpoint_age; + mysql_mutex_unlock(&log_sys.mutex); + const lsn_t lsn= log_sys.get_lsn(); + if ((lsn - last) / 4 >= max_age / 5) + buf_flush_ahead(last + max_age / 5, false); srv_wake_purge_thread_if_not_active(); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } From 106b16a5af7a8e29889e0ef8930bd2901a9ad00e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 17 Sep 2021 07:01:01 +0300 Subject: [PATCH 23/29] MDEV-26356 fixup: integer type mismatch on 32-bit --- storage/innobase/srv/srv0srv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 59fa5ca8a12..bb7dc5709db 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1896,7 +1896,7 @@ void purge_coordinator_state::refresh(bool full) max_age= log_sys.max_checkpoint_age; mysql_mutex_unlock(&log_sys.mutex); - lsn_age_factor= ((log_sys.get_lsn() - last) * 100) / max_age; + lsn_age_factor= ulint(((log_sys.get_lsn() - last) * 100) / max_age); } From d3b35598fcf8c83c8e2c1a004d507c95a42eb174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Thu, 16 Sep 2021 12:14:39 +0300 Subject: [PATCH 24/29] MDEV-26053 : TRUNCATE on table with Foreign Key Constraint no longer replicated to other nodes Problem was that there was extra condition !thd->lex->no_write_to_binlog before call to begin TOI. It seems that this variable is not initialized. TRUNCATE does not support [NO_WRITE_TO_BINLOG | LOCAL] keywords, thus we should not check this condition. All this was hidden in a macro, so I decided to remove those macros that were used only a few places with actual function calls. --- include/wsrep.h | 7 --- .../suite/galera/r/galera_fk_truncate.result | 47 +++++++++++++++++++ .../suite/galera/t/galera_fk_truncate.test | 39 +++++++++++++++ sql/sql_admin.cc | 15 +++--- sql/sql_truncate.cc | 12 ++--- sql/wsrep_mysqld.cc | 5 +- 6 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_fk_truncate.result create mode 100644 mysql-test/suite/galera/t/galera_fk_truncate.test diff --git a/include/wsrep.h b/include/wsrep.h index 5272b687732..2a9036956cc 100644 --- a/include/wsrep.h +++ b/include/wsrep.h @@ -23,8 +23,6 @@ #define DBUG_ASSERT_IF_WSREP(A) DBUG_ASSERT(A) #define WSREP_MYSQL_DB (char *)"mysql" -#define WSREP_TO_ISOLATION_BEGIN_IF(db_, table_, table_list_) \ - if (WSREP_ON && WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) #define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) \ if (WSREP_ON && WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) \ @@ -48,10 +46,6 @@ if (WSREP(thd) && !thd->lex->no_write_to_binlog \ && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto wsrep_error_label; -#define WSREP_TO_ISOLATION_BEGIN_FK_TABLES(db_, table_, table_list_, fk_tables) \ - if (WSREP(thd) && !thd->lex->no_write_to_binlog \ - && wsrep_to_isolation_begin(thd, db_, table_, table_list_, NULL, fk_tables)) - #define WSREP_DEBUG(...) \ if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__) #define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__) @@ -75,7 +69,6 @@ #define WSREP_ERROR(...) #define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) do { } while(0) #define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_) -#define WSREP_TO_ISOLATION_BEGIN_FK_TABLES(db_, table_, table_list_, fk_tables_) #define WSREP_TO_ISOLATION_END #define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) #define WSREP_SYNC_WAIT(thd_, before_) diff --git a/mysql-test/suite/galera/r/galera_fk_truncate.result b/mysql-test/suite/galera/r/galera_fk_truncate.result new file mode 100644 index 00000000000..a90f3e27ab7 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_fk_truncate.result @@ -0,0 +1,47 @@ +connection node_2; +connection node_1; +CREATE TABLE author ( +id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, +name VARCHAR(100) NOT NULL +) ENGINE = InnoDB; +CREATE TABLE book ( +id MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, +title VARCHAR(200) NOT NULL, +author_id SMALLINT UNSIGNED NOT NULL, +CONSTRAINT `fk_book_author` + FOREIGN KEY (author_id) REFERENCES author (id) +ON DELETE CASCADE +ON UPDATE RESTRICT +) ENGINE = InnoDB; +INSERT INTO author (name) VALUES ('Abdul Alhazred'); +INSERT INTO book (title, author_id) VALUES ('Necronomicon', LAST_INSERT_ID()); +TRUNCATE TABLE book; +SELECT * FROM author; +id name +1 Abdul Alhazred +SELECT * FROM book; +id title author_id +connection node_2; +SELECT * FROM author; +id name +1 Abdul Alhazred +SELECT * FROM book; +id title author_id +INSERT INTO author (name) VALUES ('Abdul Alhazred'); +INSERT INTO book (title, author_id) VALUES ('Necronomicon', LAST_INSERT_ID()); +TRUNCATE TABLE book; +SELECT * FROM author; +id name +1 Abdul Alhazred +2 Abdul Alhazred +SELECT * FROM book; +id title author_id +connection node_1; +TRUNCATE TABLE book; +SELECT * FROM author; +id name +1 Abdul Alhazred +2 Abdul Alhazred +SELECT * FROM book; +id title author_id +DROP TABLE book, author; diff --git a/mysql-test/suite/galera/t/galera_fk_truncate.test b/mysql-test/suite/galera/t/galera_fk_truncate.test new file mode 100644 index 00000000000..9d54a720432 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_fk_truncate.test @@ -0,0 +1,39 @@ +--source include/galera_cluster.inc + +CREATE TABLE author ( + id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(100) NOT NULL +) ENGINE = InnoDB; + +CREATE TABLE book ( + id MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + title VARCHAR(200) NOT NULL, + author_id SMALLINT UNSIGNED NOT NULL, + CONSTRAINT `fk_book_author` + FOREIGN KEY (author_id) REFERENCES author (id) + ON DELETE CASCADE + ON UPDATE RESTRICT +) ENGINE = InnoDB; + +INSERT INTO author (name) VALUES ('Abdul Alhazred'); +INSERT INTO book (title, author_id) VALUES ('Necronomicon', LAST_INSERT_ID()); + +TRUNCATE TABLE book; +SELECT * FROM author; +SELECT * FROM book; + +--connection node_2 +SELECT * FROM author; +SELECT * FROM book; +INSERT INTO author (name) VALUES ('Abdul Alhazred'); +INSERT INTO book (title, author_id) VALUES ('Necronomicon', LAST_INSERT_ID()); +TRUNCATE TABLE book; +SELECT * FROM author; +SELECT * FROM book; + +--connection node_1 +TRUNCATE TABLE book; +SELECT * FROM author; +SELECT * FROM book; + +DROP TABLE book, author; diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index b626200d297..f692afb1440 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -466,16 +466,17 @@ static bool wsrep_toi_replication(THD *thd, TABLE_LIST *tables) /* now TOI replication, with no locks held */ if (keys.empty()) { - WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, tables); - } else { - WSREP_TO_ISOLATION_BEGIN_FK_TABLES(NULL, NULL, tables, &keys) { + if (!thd->lex->no_write_to_binlog && + wsrep_to_isolation_begin(thd, NULL, NULL, tables)) + return true; + } + else + { + if (!thd->lex->no_write_to_binlog && + wsrep_to_isolation_begin(thd, NULL, NULL, tables, NULL, &keys)) return true; - } } return false; - - wsrep_error_label: - return true; } #endif /* WITH_WSREP */ diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index aca43021798..62f6f8a3f90 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -424,15 +424,13 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { if (keys.empty()) { - WSREP_TO_ISOLATION_BEGIN_IF(table_ref->db.str, table_ref->table_name.str, NULL) - { + if (wsrep_to_isolation_begin(thd, table_ref->db.str, table_ref->table_name.str, NULL)) DBUG_RETURN(TRUE); - } - } else { - WSREP_TO_ISOLATION_BEGIN_FK_TABLES(NULL, NULL, table_ref, &keys) - { + } + else + { + if (wsrep_to_isolation_begin(thd, NULL, NULL, table_ref, NULL, &keys)) DBUG_RETURN(TRUE); - } } } } diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index bf1e4e32b49..55564dbe235 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2164,8 +2164,9 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, { /* No isolation for applier or replaying threads. - */ - if (!wsrep_thd_is_local(thd)) return 0; + */ + if (!wsrep_thd_is_local(thd)) + return 0; int ret= 0; mysql_mutex_lock(&thd->LOCK_thd_data); From a2e55131c64bee63b980e78cec92cf9c4bf1b7ec Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Thu, 16 Sep 2021 16:26:59 +0200 Subject: [PATCH 25/29] MDEV-19950 addendum: galera_ssl_upgrade removed from the list of disabled tests and adapted for 10.4+ --- mysql-test/suite/galera/disabled.def | 1 - mysql-test/suite/galera/r/galera_ssl_upgrade.result | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index 590a8623223..d742879bb10 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -31,7 +31,6 @@ galera_parallel_simple : MDEV-20318 galera.galera_parallel_simple fails galera_pc_ignore_sb : MDEV-20888 galera.galera_pc_ignore_sb galera_pc_recovery : MDEV-25199 cluster fails to start up galera_shutdown_nonprim : MDEV-21493 galera.galera_shutdown_nonprim -galera_ssl_upgrade : MDEV-19950 Galera test failure on galera_ssl_upgrade galera_toi_ddl_nonconflicting : MDEV-21518 galera.galera_toi_ddl_nonconflicting galera_toi_truncate : MDEV-22996 Hang on galera_toi_truncate test case galera_var_ignore_apply_errors : MDEV-20451: Lock wait timeout exceeded in galera_var_ignore_apply_errors diff --git a/mysql-test/suite/galera/r/galera_ssl_upgrade.result b/mysql-test/suite/galera/r/galera_ssl_upgrade.result index 1443e34d041..134d1d1b605 100644 --- a/mysql-test/suite/galera/r/galera_ssl_upgrade.result +++ b/mysql-test/suite/galera/r/galera_ssl_upgrade.result @@ -1,3 +1,5 @@ +connection node_2; +connection node_1; connection node_1; call mtr.add_suppression("WSREP: write_handler(): protocol is shutdown.*"); connection node_2; @@ -24,5 +26,6 @@ connection node_1; SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; VARIABLE_VALUE = 2 1 +connection node_2; disconnect node_2; disconnect node_1; From 48bbc447335a0b3ec698e975d48ad49145780624 Mon Sep 17 00:00:00 2001 From: Krunal Bauskar Date: Wed, 15 Sep 2021 16:18:39 +0800 Subject: [PATCH 26/29] MDEV-26609 : Avoid deriving ELEMENT_PER_LATCH from cacheline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * buffer pool has latches that protect access to pages. * there is a latch per N pages.   (check page_hash_table for more details) * N is calculated based on the cacheline size. * for example: if cacheline size is   : 64 then 7 pages pointers + 1 latch can be hosted on the same cacheline   : 128 then 15 pages pointers + 1 latch can be hosted on the same cacheline * arm generally have wider cacheline so with arm 1 latch is used   to access 15 pages vs with x86 1 latch is used to access 7 pages.   Naturally, the contention is more with arm case. * said patch help relax this contention by limiting the elements per cacheline to 7 (+ 1 latch slot).   for wider-cacheline (say 128), the remaining 8 slots are kept empty.   this ensures there are no 2 latches on the same cacheline to avoid latch level contention. Based on suggestion from Marko, the same logic is now extended to lock_sys_t::hash_table. --- storage/innobase/include/buf0buf.h | 16 +++++++++++++--- storage/innobase/include/lock0lock.h | 13 ++++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 2ad731e7b94..3f922708ef1 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1726,10 +1726,15 @@ public: /** Hash table with singly-linked overflow lists. @see hash_table_t */ struct page_hash_table { + static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes"); + static_assert(!(CPU_LEVEL1_DCACHE_LINESIZE & 63), + "not a multiple of 64 bytes"); + /** Number of array[] elements per page_hash_latch. Must be one less than a power of 2. */ - static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE / - sizeof(void*) - 1; + static constexpr size_t ELEMENTS_PER_LATCH= 64 / sizeof(void*) - 1; + static constexpr size_t EMPTY_SLOTS_PER_LATCH= + ((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*)); /** number of payload elements in array[] */ Atomic_relaxed n_cells; @@ -1746,7 +1751,12 @@ public: /** @return the index of an array element */ ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); } /** @return raw array index converted to padded index */ - static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; } + static ulint pad(ulint h) + { + ulint latches= h / ELEMENTS_PER_LATCH; + ulint empty_slots= latches * EMPTY_SLOTS_PER_LATCH; + return 1 + latches + empty_slots + h; + } private: /** @return the hash value before any ELEMENTS_PER_LATCH padding */ static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); } diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 859441afcc0..c2fef3baaac 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -609,8 +609,9 @@ public: /** Number of array[] elements per hash_latch. Must be LATCH less than a power of 2. */ - static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE / - sizeof(void*) - LATCH; + static constexpr size_t ELEMENTS_PER_LATCH= (64 / sizeof(void*)) - LATCH; + static constexpr size_t EMPTY_SLOTS_PER_LATCH= + ((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*)); /** number of payload elements in array[]. Protected by lock_sys.latch. */ ulint n_cells; @@ -632,9 +633,15 @@ public: /** @return the index of an array element */ inline ulint calc_hash(ulint fold) const; + /** @return raw array index converted to padded index */ static ulint pad(ulint h) - { return LATCH + LATCH * (h / ELEMENTS_PER_LATCH) + h; } + { + ulint latches= LATCH * (h / ELEMENTS_PER_LATCH); + ulint empty_slots= (h / ELEMENTS_PER_LATCH) * EMPTY_SLOTS_PER_LATCH; + return LATCH + latches + empty_slots + h; + } + /** Get a latch. */ static hash_latch *latch(hash_cell_t *cell) { From 1a4a7dddb6505352a7c3b4a2e1583ed92cad1dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Sep 2021 12:50:55 +0300 Subject: [PATCH 27/29] Cleanup: Make btr_root_block_get() more robust btr_root_block_get(): Check for index->page == FIL_NULL. btr_root_get(): Declare static. Other callers can invoke btr_root_block_get() directly. btr_get_size(): Remove conditions that are checked in btr_root_block_get(). --- storage/innobase/btr/btr0btr.cc | 9 ++++----- storage/innobase/btr/btr0cur.cc | 6 +++--- storage/innobase/include/btr0btr.h | 10 ---------- storage/innobase/row/row0purge.cc | 4 ++-- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index f786da84b17..56e84e47fda 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -219,7 +219,7 @@ btr_root_block_get( or RW_X_LATCH */ mtr_t* mtr) /*!< in: mtr */ { - if (!index->table || !index->table->space) { + if (!index->table || !index->table->space || index->page == FIL_NULL) { return NULL; } @@ -259,6 +259,7 @@ btr_root_block_get( /**************************************************************//** Gets the root node of a tree and sx-latches it for segment access. @return root page, sx-latched */ +static page_t* btr_root_get( /*=========*/ @@ -578,10 +579,8 @@ btr_get_size( || mtr->memo_contains(index->lock, MTR_MEMO_S_LOCK)); ut_ad(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE); - if (index->page == FIL_NULL - || dict_index_is_online_ddl(index) - || !index->is_committed() - || !index->table->space) { + if (dict_index_is_online_ddl(index) + || !index->is_committed()) { return(ULINT_UNDEFINED); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index e533f93b199..adba676808c 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -411,15 +411,15 @@ unreadable: return DB_CORRUPTION; } - page_t* root = btr_root_get(index, mtr); + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - if (!root || btr_cur_instant_root_init(index, root)) { + if (!root || btr_cur_instant_root_init(index, root->frame)) { goto unreadable; } ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES); - if (fil_page_get_type(root) == FIL_PAGE_INDEX) { + if (fil_page_get_type(root->frame) == FIL_PAGE_INDEX) { ut_ad(!index->is_instant()); return DB_SUCCESS; } diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 65608552986..4543fc21e8a 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -187,16 +187,6 @@ void btr_corruption_report(const buf_block_t* block,const dict_index_t* index); != index->table->not_redundant()) \ btr_corruption_report(block, index) -/**************************************************************//** -Gets the root node of a tree and sx-latches it for segment access. -@return root page, sx-latched */ -page_t* -btr_root_get( -/*=========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); - /**************************************************************//** Checks and adjusts the root node of a tree during IMPORT TABLESPACE. @return error code, or DB_SUCCESS */ diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index daaba10bc5a..010ec224460 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -914,7 +914,7 @@ skip_secondaries: index->set_modified(mtr); - /* NOTE: we must also acquire an X-latch to the + /* NOTE: we must also acquire a U latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, @@ -923,7 +923,7 @@ skip_secondaries: latching order if we would only later latch the root page of such a tree! */ - btr_root_get(index, &mtr); + btr_root_block_get(index, RW_SX_LATCH, &mtr); block = buf_page_get( page_id_t(rseg.space->id, page_no), From 1e9c922fa726b22f4522f2a4de0fcb6595404086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 17 Sep 2021 09:14:20 +0300 Subject: [PATCH 28/29] MDEV-26623 Possible race condition between statistics and bulk insert When computing statistics, let us play it safe and check whether an insert into an empty table is in progress, once we have acquired the root page latch. If yes, let us pretend that the table is empty, just like MVCC reads would do. It is unlikely that this race condition could lead into any crashes before MDEV-24621, because the index tree structure should be protected by the page latches. But, at least we can avoid some busy work and return earlier. As part of this, some code that is only used for statistics calculation is being moved into static functions in that compilation unit. --- storage/innobase/btr/btr0btr.cc | 74 ----- storage/innobase/btr/btr0cur.cc | 344 -------------------- storage/innobase/dict/dict0stats.cc | 467 +++++++++++++++++++++++++--- storage/innobase/include/btr0btr.h | 24 -- storage/innobase/include/btr0cur.h | 31 -- 5 files changed, 426 insertions(+), 514 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 56e84e47fda..02aa89361c5 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -273,38 +273,6 @@ btr_root_get( return(root ? buf_block_get_frame(root) : NULL); } -/**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -ulint -btr_height_get( -/*===========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint height=0; - buf_block_t* root_block; - - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - - /* S latches the page */ - root_block = btr_root_block_get(index, RW_S_LATCH, mtr); - - if (root_block) { - height = btr_page_get_level(buf_block_get_frame(root_block)); - - /* Release the S latch on the root page. */ - mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX); - } - - return(height); -} - /**************************************************************//** Checks a file segment header within a B-tree root page and updates the segment header space id. @@ -562,48 +530,6 @@ btr_page_alloc( index, hint_page_no, file_direction, level, mtr, init_mtr); } -/**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -ulint -btr_get_size( -/*=========*/ - const dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ -{ - ulint n=0; - - ut_ad(srv_read_only_mode - || mtr->memo_contains(index->lock, MTR_MEMO_S_LOCK)); - ut_ad(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE); - - if (dict_index_is_online_ddl(index) - || !index->is_committed()) { - return(ULINT_UNDEFINED); - } - - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - if (!root) { - return ULINT_UNDEFINED; - } - mtr->x_lock_space(index->table->space); - if (flag == BTR_N_LEAF_PAGES) { - fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &n, mtr); - } else { - ulint dummy; - n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP - + root->frame, &dummy, mtr); - n += fseg_n_reserved_pages(*root, - PAGE_HEADER + PAGE_BTR_SEG_LEAF - + root->frame, &dummy, mtr); - } - - return(n); -} - /**************************************************************//** Frees a page used in an ibuf tree. Puts the page to the free list of the ibuf tree. */ diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index adba676808c..32bdf9a8a51 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -137,17 +137,6 @@ can be released by page reorganize, then it is reorganized */ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ -/** Estimated table level stats from sampled value. -@param value sampled stats -@param index index being sampled -@param sample number of sampled rows -@param ext_size external stored data size -@param not_empty table not empty -@return estimated table wide stats from sampled value */ -#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \ - (((value) * static_cast(index->stat_n_leaf_pages) \ - + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) - /* @} */ /*******************************************************************//** @@ -6552,339 +6541,6 @@ btr_estimate_n_rows_in_range( index, tuple1, tuple2, 1); } -/*******************************************************************//** -Record the number of non_null key values in a given index for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are eventually stored in the array: -index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ -static -void -btr_record_not_null_field_in_rec( -/*=============================*/ - ulint n_unique, /*!< in: dict_index_get_n_unique(index), - number of columns uniquely determine - an index entry */ - const rec_offs* offsets, /*!< in: rec_get_offsets(rec, index), - its size could be for all fields or - that of "n_unique" */ - ib_uint64_t* n_not_null) /*!< in/out: array to record number of - not null rows for n-column prefix */ -{ - ulint i; - - ut_ad(rec_offs_n_fields(offsets) >= n_unique); - - if (n_not_null == NULL) { - return; - } - - for (i = 0; i < n_unique; i++) { - if (rec_offs_nth_sql_null(offsets, i)) { - break; - } - - n_not_null[i]++; - } -} - -/** Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -result.n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array result.n_non_null_key_vals. -@param[in] index index -@return vector with statistics information -empty vector if the index is unavailable. */ -std::vector -btr_estimate_number_of_different_key_vals(dict_index_t* index) -{ - btr_cur_t cursor; - page_t* page; - rec_t* rec; - ulint n_cols; - ib_uint64_t* n_diff; - ib_uint64_t* n_not_null; - ibool stats_null_not_equal; - uintmax_t n_sample_pages=1; /* number of pages to sample */ - ulint not_empty_flag = 0; - ulint total_external_size = 0; - ulint i; - ulint j; - uintmax_t add_on; - mtr_t mtr; - mem_heap_t* heap = NULL; - rec_offs* offsets_rec = NULL; - rec_offs* offsets_next_rec = NULL; - - std::vector result; - - /* For spatial index, there is no such stats can be - fetched. */ - ut_ad(!dict_index_is_spatial(index)); - - n_cols = dict_index_get_n_unique(index); - - heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) - * n_cols - + dict_index_get_n_fields(index) - * (sizeof *offsets_rec - + sizeof *offsets_next_rec)); - - n_diff = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof(n_diff[0])); - - n_not_null = NULL; - - /* Check srv_innodb_stats_method setting, and decide whether we - need to record non-null value and also decide if NULL is - considered equal (by setting stats_null_not_equal value) */ - switch (srv_innodb_stats_method) { - case SRV_STATS_NULLS_IGNORED: - n_not_null = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof *n_not_null); - /* fall through */ - - case SRV_STATS_NULLS_UNEQUAL: - /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL - case, we will treat NULLs as unequal value */ - stats_null_not_equal = TRUE; - break; - - case SRV_STATS_NULLS_EQUAL: - stats_null_not_equal = FALSE; - break; - - default: - ut_error; - } - - if (srv_stats_sample_traditional) { - /* It makes no sense to test more pages than are contained - in the index, thus we lower the number if it is too high */ - if (srv_stats_transient_sample_pages > index->stat_index_size) { - if (index->stat_index_size > 0) { - n_sample_pages = index->stat_index_size; - } - } else { - n_sample_pages = srv_stats_transient_sample_pages; - } - } else { - /* New logaritmic number of pages that are estimated. - Number of pages estimated should be between 1 and - index->stat_index_size. - - If we have only 0 or 1 index pages then we can only take 1 - sample. We have already initialized n_sample_pages to 1. - - So taking index size as I and sample as S and log(I)*S as L - - requirement 1) we want the out limit of the expression to not exceed I; - requirement 2) we want the ideal pages to be at least S; - so the current expression is min(I, max( min(S,I), L) - - looking for simplifications: - - case 1: assume S < I - min(I, max( min(S,I), L) -> min(I , max( S, L)) - - but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. - - so we have: min(I , L) - - case 2: assume I < S - min(I, max( min(S,I), L) -> min(I, max( I, L)) - - case 2a: L > I - min(I, max( I, L)) -> min(I, L) -> I - - case 2b: when L < I - min(I, max( I, L)) -> min(I, I ) -> I - - so taking all case2 paths is I, our expression is: - n_pages = S < I? min(I,L) : I - */ - if (index->stat_index_size > 1) { - n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) - ? ut_min(index->stat_index_size, - static_cast( - log2(double(index->stat_index_size)) - * double(srv_stats_transient_sample_pages))) - : index->stat_index_size; - } - } - - /* Sanity check */ - ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); - - /* We sample some pages in the index to get an estimate */ - - for (i = 0; i < n_sample_pages; i++) { - mtr_start(&mtr); - - bool available; - - available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, - &cursor, &mtr); - - if (!available) { - mtr_commit(&mtr); - mem_heap_free(heap); - - return result; - } - - /* Count the number of different key values for each prefix of - the key on this index page. If the prefix does not determine - the index record uniquely in the B-tree, then we subtract one - because otherwise our algorithm would give a wrong estimate - for an index where there is just one key value. */ - - if (!index->is_readable()) { - mtr_commit(&mtr); - goto exit_loop; - } - - page = btr_cur_get_page(&cursor); - - rec = page_rec_get_next(page_get_infimum_rec(page)); - const ulint n_core = page_is_leaf(page) - ? index->n_core_fields : 0; - - if (!page_rec_is_supremum(rec)) { - not_empty_flag = 1; - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - n_core, - ULINT_UNDEFINED, &heap); - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_rec, n_not_null); - } - } - - while (!page_rec_is_supremum(rec)) { - ulint matched_fields; - rec_t* next_rec = page_rec_get_next(rec); - if (page_rec_is_supremum(next_rec)) { - total_external_size += - btr_rec_get_externally_stored_len( - rec, offsets_rec); - break; - } - - offsets_next_rec = rec_get_offsets(next_rec, index, - offsets_next_rec, - n_core, - ULINT_UNDEFINED, - &heap); - - cmp_rec_rec(rec, next_rec, - offsets_rec, offsets_next_rec, - index, stats_null_not_equal, - &matched_fields); - - for (j = matched_fields; j < n_cols; j++) { - /* We add one if this index record has - a different prefix from the previous */ - - n_diff[j]++; - } - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_next_rec, n_not_null); - } - - total_external_size - += btr_rec_get_externally_stored_len( - rec, offsets_rec); - - rec = next_rec; - /* Initialize offsets_rec for the next round - and assign the old offsets_rec buffer to - offsets_next_rec. */ - { - rec_offs* offsets_tmp = offsets_rec; - offsets_rec = offsets_next_rec; - offsets_next_rec = offsets_tmp; - } - } - - if (n_cols == dict_index_get_n_unique_in_tree(index) - && page_has_siblings(page)) { - - /* If there is more than one leaf page in the tree, - we add one because we know that the first record - on the page certainly had a different prefix than the - last record on the previous index page in the - alphabetical order. Before this fix, if there was - just one big record on each clustered index page, the - algorithm grossly underestimated the number of rows - in the table. */ - - n_diff[n_cols - 1]++; - } - - mtr_commit(&mtr); - } - -exit_loop: - /* If we saw k borders between different key values on - n_sample_pages leaf pages, we can estimate how many - there will be in index->stat_n_leaf_pages */ - - /* We must take into account that our sample actually represents - also the pages used for external storage of fields (those pages are - included in index->stat_n_leaf_pages) */ - - result.reserve(n_cols); - - for (j = 0; j < n_cols; j++) { - index_field_stats_t stat; - - stat.n_diff_key_vals - = BTR_TABLE_STATS_FROM_SAMPLE( - n_diff[j], index, n_sample_pages, - total_external_size, not_empty_flag); - - /* If the tree is small, smaller than - 10 * n_sample_pages + total_external_size, then - the above estimate is ok. For bigger trees it is common that we - do not see any borders between key values in the few pages - we pick. But still there may be n_sample_pages - different key values, or even more. Let us try to approximate - that: */ - - add_on = index->stat_n_leaf_pages - / (10 * (n_sample_pages - + total_external_size)); - - if (add_on > n_sample_pages) { - add_on = n_sample_pages; - } - - stat.n_diff_key_vals += add_on; - - stat.n_sample_sizes = n_sample_pages; - - if (n_not_null != NULL) { - stat.n_non_null_key_vals = - BTR_TABLE_STATS_FROM_SAMPLE( - n_not_null[j], index, n_sample_pages, - total_external_size, not_empty_flag); - } - - result.push_back(stat); - } - - mem_heap_free(heap); - - return result; -} - /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ /***********************************************************//** diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index d7466ae5f8a..f92beac0f67 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1024,6 +1024,367 @@ dict_stats_snapshot_free( dict_stats_table_clone_free(t); } +/** Statistics for one field of an index. */ +struct index_field_stats_t +{ + ib_uint64_t n_diff_key_vals; + ib_uint64_t n_sample_sizes; + ib_uint64_t n_non_null_key_vals; + + index_field_stats_t(ib_uint64_t n_diff_key_vals= 0, + ib_uint64_t n_sample_sizes= 0, + ib_uint64_t n_non_null_key_vals= 0) + : n_diff_key_vals(n_diff_key_vals), n_sample_sizes(n_sample_sizes), + n_non_null_key_vals(n_non_null_key_vals) + { + } +}; + +/*******************************************************************//** +Record the number of non_null key values in a given index for +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const rec_offs* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_uint64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + if (rec_offs_nth_sql_null(offsets, i)) { + break; + } + + n_not_null[i]++; + } +} + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \ + (((value) * static_cast(index->stat_n_leaf_pages) \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + +/** Estimates the number of different key values in a given index, for +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed +0..n_uniq-1) and the number of pages that were sampled is saved in +result.n_sample_sizes[]. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array result.n_non_null_key_vals. +@param index B-tree index +@param bulk_trx_id the value of index->table->bulk_trx_id at the start +@return vector with statistics information +empty vector if the index is unavailable. */ +static +std::vector +btr_estimate_number_of_different_key_vals(dict_index_t* index, + trx_id_t bulk_trx_id) +{ + btr_cur_t cursor; + page_t* page; + rec_t* rec; + ulint n_cols; + ib_uint64_t* n_diff; + ib_uint64_t* n_not_null; + ibool stats_null_not_equal; + uintmax_t n_sample_pages=1; /* number of pages to sample */ + ulint not_empty_flag = 0; + ulint total_external_size = 0; + ulint i; + ulint j; + uintmax_t add_on; + mtr_t mtr; + mem_heap_t* heap = NULL; + rec_offs* offsets_rec = NULL; + rec_offs* offsets_next_rec = NULL; + + std::vector result; + + ut_ad(!index->is_spatial()); + + n_cols = dict_index_get_n_unique(index); + + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * n_cols + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof(n_diff[0])); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } + + if (srv_stats_sample_traditional) { + /* It makes no sense to test more pages than are contained + in the index, thus we lower the number if it is too high */ + if (srv_stats_transient_sample_pages > index->stat_index_size) { + if (index->stat_index_size > 0) { + n_sample_pages = index->stat_index_size; + } + } else { + n_sample_pages = srv_stats_transient_sample_pages; + } + } else { + /* New logaritmic number of pages that are estimated. + Number of pages estimated should be between 1 and + index->stat_index_size. + + If we have only 0 or 1 index pages then we can only take 1 + sample. We have already initialized n_sample_pages to 1. + + So taking index size as I and sample as S and log(I)*S as L + + requirement 1) we want the out limit of the expression to not exceed I; + requirement 2) we want the ideal pages to be at least S; + so the current expression is min(I, max( min(S,I), L) + + looking for simplifications: + + case 1: assume S < I + min(I, max( min(S,I), L) -> min(I , max( S, L)) + + but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. + + so we have: min(I , L) + + case 2: assume I < S + min(I, max( min(S,I), L) -> min(I, max( I, L)) + + case 2a: L > I + min(I, max( I, L)) -> min(I, L) -> I + + case 2b: when L < I + min(I, max( I, L)) -> min(I, I ) -> I + + so taking all case2 paths is I, our expression is: + n_pages = S < I? min(I,L) : I + */ + if (index->stat_index_size > 1) { + n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) + ? ut_min(index->stat_index_size, + static_cast( + log2(double(index->stat_index_size)) + * double(srv_stats_transient_sample_pages))) + : index->stat_index_size; + } + } + + /* Sanity check */ + ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); + + /* We sample some pages in the index to get an estimate */ + + for (i = 0; i < n_sample_pages; i++) { + mtr.start(); + + bool available; + + available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, + &cursor, &mtr); + + if (!available || index->table->bulk_trx_id != bulk_trx_id) { + mtr.commit(); + mem_heap_free(heap); + + return result; + } + + /* Count the number of different key values for each prefix of + the key on this index page. If the prefix does not determine + the index record uniquely in the B-tree, then we subtract one + because otherwise our algorithm would give a wrong estimate + for an index where there is just one key value. */ + + if (!index->is_readable()) { + mtr.commit(); + goto exit_loop; + } + + page = btr_cur_get_page(&cursor); + + rec = page_rec_get_next(page_get_infimum_rec(page)); + const ulint n_core = page_is_leaf(page) + ? index->n_core_fields : 0; + + if (!page_rec_is_supremum(rec)) { + not_empty_flag = 1; + offsets_rec = rec_get_offsets(rec, index, offsets_rec, + n_core, + ULINT_UNDEFINED, &heap); + + if (n_not_null != NULL) { + btr_record_not_null_field_in_rec( + n_cols, offsets_rec, n_not_null); + } + } + + while (!page_rec_is_supremum(rec)) { + ulint matched_fields; + rec_t* next_rec = page_rec_get_next(rec); + if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); + break; + } + + offsets_next_rec = rec_get_offsets(next_rec, index, + offsets_next_rec, + n_core, + ULINT_UNDEFINED, + &heap); + + cmp_rec_rec(rec, next_rec, + offsets_rec, offsets_next_rec, + index, stats_null_not_equal, + &matched_fields); + + for (j = matched_fields; j < n_cols; j++) { + /* We add one if this index record has + a different prefix from the previous */ + + n_diff[j]++; + } + + if (n_not_null != NULL) { + btr_record_not_null_field_in_rec( + n_cols, offsets_next_rec, n_not_null); + } + + total_external_size + += btr_rec_get_externally_stored_len( + rec, offsets_rec); + + rec = next_rec; + /* Initialize offsets_rec for the next round + and assign the old offsets_rec buffer to + offsets_next_rec. */ + { + rec_offs* offsets_tmp = offsets_rec; + offsets_rec = offsets_next_rec; + offsets_next_rec = offsets_tmp; + } + } + + if (n_cols == dict_index_get_n_unique_in_tree(index) + && page_has_siblings(page)) { + + /* If there is more than one leaf page in the tree, + we add one because we know that the first record + on the page certainly had a different prefix than the + last record on the previous index page in the + alphabetical order. Before this fix, if there was + just one big record on each clustered index page, the + algorithm grossly underestimated the number of rows + in the table. */ + + n_diff[n_cols - 1]++; + } + + mtr.commit(); + } + +exit_loop: + /* If we saw k borders between different key values on + n_sample_pages leaf pages, we can estimate how many + there will be in index->stat_n_leaf_pages */ + + /* We must take into account that our sample actually represents + also the pages used for external storage of fields (those pages are + included in index->stat_n_leaf_pages) */ + + result.reserve(n_cols); + + for (j = 0; j < n_cols; j++) { + index_field_stats_t stat; + + stat.n_diff_key_vals + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); + + /* If the tree is small, smaller than + 10 * n_sample_pages + total_external_size, then + the above estimate is ok. For bigger trees it is common that we + do not see any borders between key values in the few pages + we pick. But still there may be n_sample_pages + different key values, or even more. Let us try to approximate + that: */ + + add_on = index->stat_n_leaf_pages + / (10 * (n_sample_pages + + total_external_size)); + + if (add_on > n_sample_pages) { + add_on = n_sample_pages; + } + + stat.n_diff_key_vals += add_on; + + stat.n_sample_sizes = n_sample_pages; + + if (n_not_null != NULL) { + stat.n_non_null_key_vals = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); + } + + result.push_back(stat); + } + + mem_heap_free(heap); + + return result; +} + /*********************************************************************//** Calculates new estimates for index statistics. This function is relatively quick and is used to calculate transient statistics that @@ -1054,39 +1415,48 @@ dummy_empty: } else if (ibuf_debug && !dict_index_is_clust(index)) { goto dummy_empty; #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + } else if (dict_index_is_online_ddl(index) || !index->is_committed() + || !index->table->space) { + goto dummy_empty; } else { mtr_t mtr; - ulint size; mtr.start(); mtr_s_lock_index(index, &mtr); - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); - - if (size != ULINT_UNDEFINED) { - index->stat_index_size = size; - - size = btr_get_size( - index, BTR_N_LEAF_PAGES, &mtr); + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, + &mtr); + if (!root) { +invalid: + mtr.commit(); + goto dummy_empty; } + const auto bulk_trx_id = index->table->bulk_trx_id; + if (bulk_trx_id && trx_sys.find(nullptr, bulk_trx_id, false)) { + goto invalid; + } + + mtr.x_lock_space(index->table->space); + + ulint dummy, size; + index->stat_index_size + = fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_LEAF + + root->frame, &size, &mtr) + + fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_TOP + + root->frame, &dummy, &mtr); + mtr.commit(); - switch (size) { - case ULINT_UNDEFINED: - goto dummy_empty; - case 0: - /* The root node of the tree is a leaf */ - size = 1; - } - - index->stat_n_leaf_pages = size; + index->stat_n_leaf_pages = size ? size : 1; /* Do not continue if table decryption has failed or table is already marked as corrupted. */ if (index->is_readable()) { std::vector stats = btr_estimate_number_of_different_key_vals( - index); + index, bulk_trx_id); if (!stats.empty()) { index->table->stats_mutex_lock(); @@ -2037,7 +2407,7 @@ struct index_stats_t { stats.reserve(n_uniq); for (ulint i= 0; i < n_uniq; ++i) - stats.push_back(index_field_stats_t(0, 1, 0)); + stats.push_back(index_field_stats_t{0, 1, 0}); } }; @@ -2123,15 +2493,12 @@ stat_n_leaf_pages. This function can be slow. @return index stats */ static index_stats_t dict_stats_analyze_index(dict_index_t* index) { - ulint root_level; - ulint level; bool level_is_analyzed; ulint n_uniq; ulint n_prefix; ib_uint64_t total_recs; ib_uint64_t total_pages; mtr_t mtr; - ulint size; index_stats_t result(index->n_uniq); DBUG_ENTER("dict_stats_analyze_index"); @@ -2150,30 +2517,43 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) mtr.start(); mtr_s_lock_index(index, &mtr); - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); + uint16_t root_level; - if (size != ULINT_UNDEFINED) { - result.index_size = size; - size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr); + { + buf_block_t* root; + root = btr_root_block_get(index, RW_SX_LATCH, &mtr); + if (!root) { +empty_index: + mtr.commit(); + dict_stats_assert_initialized_index(index); + DBUG_RETURN(result); + } + + root_level = btr_page_get_level(root->frame); + + mtr.x_lock_space(index->table->space); + ulint dummy, size; + result.index_size + = fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_LEAF + + root->frame, &size, &mtr) + + fseg_n_reserved_pages(*root, PAGE_HEADER + + PAGE_BTR_SEG_TOP + + root->frame, &dummy, &mtr); + result.n_leaf_pages = size ? size : 1; + } + + const auto bulk_trx_id = index->table->bulk_trx_id; + if (bulk_trx_id && trx_sys.find(nullptr, bulk_trx_id, false)) { + result.index_size = 1; + result.n_leaf_pages = 1; + goto empty_index; } - /* Release the X locks on the root page taken by btr_get_size() */ mtr.commit(); - switch (size) { - case ULINT_UNDEFINED: - dict_stats_assert_initialized_index(index); - DBUG_RETURN(result); - case 0: - /* The root node of the tree is a leaf */ - size = 1; - } - - result.n_leaf_pages = size; - mtr.start(); mtr_sx_lock_index(index, &mtr); - root_level = btr_height_get(index, &mtr); n_uniq = dict_index_get_n_unique(index); @@ -2251,7 +2631,7 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) So if we find that the first level containing D distinct keys (on n_prefix columns) is L, we continue from L when searching for D distinct keys on n_prefix-1 columns. */ - level = root_level; + auto level = root_level; level_is_analyzed = false; for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) { @@ -2265,7 +2645,10 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) mtr.commit(); mtr.start(); mtr_sx_lock_index(index, &mtr); - if (root_level != btr_height_get(index, &mtr)) { + buf_block_t *root = btr_root_block_get(index, RW_S_LATCH, + &mtr); + if (!root || root_level != btr_page_get_level(root->frame) + || index->table->bulk_trx_id != bulk_trx_id) { /* Just quit if the tree has changed beyond recognition here. The old stats from previous runs will remain in the values that we have @@ -2277,6 +2660,8 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) break; } + mtr.memo_release(root, MTR_MEMO_PAGE_S_FIX); + /* check whether we should pick the current level; we pick level 1 even if it does not have enough distinct records because we do not want to scan the diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 4543fc21e8a..40293df21e5 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -196,18 +196,6 @@ btr_root_adjust_on_import( const dict_index_t* index) /*!< in: index tree */ MY_ATTRIBUTE((warn_unused_result)); -/**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -ulint -btr_height_get( -/*===========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((warn_unused_result)); - /** Get an index page and declare its latching order level. @param[in] index index tree @param[in] page page number @@ -537,17 +525,6 @@ btr_discard_page( btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on the root page */ mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -ulint -btr_get_size( -/*=========*/ - const dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ - MY_ATTRIBUTE((warn_unused_result)); /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume @@ -614,7 +591,6 @@ btr_root_block_get( rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr); /*!< in: mtr */ - /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index c7f25aff4b7..911c79c29ba 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -575,37 +575,6 @@ btr_estimate_n_rows_in_range( btr_pos_t* range_start, btr_pos_t* range_end); - -/** Statistics for one field of an index. */ -struct index_field_stats_t -{ - ib_uint64_t n_diff_key_vals; - ib_uint64_t n_sample_sizes; - ib_uint64_t n_non_null_key_vals; - - index_field_stats_t(ib_uint64_t n_diff_key_vals= 0, - ib_uint64_t n_sample_sizes= 0, - ib_uint64_t n_non_null_key_vals= 0) - : n_diff_key_vals(n_diff_key_vals), n_sample_sizes(n_sample_sizes), - n_non_null_key_vals(n_non_null_key_vals) - { - } -}; - -/** Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -index->stat_n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array index->stat_n_non_null_key_vals. -@param[in] index index -@return stat vector if the index is available and we get the estimated numbers, -empty vector if the index is unavailable. */ -std::vector -btr_estimate_number_of_different_key_vals(dict_index_t* index); - /** Gets the externally stored size of a record, in units of a database page. @param[in] rec record @param[in] offsets array returned by rec_get_offsets() From 03a10706ecfa5cf0a6252d436e341483f5c0827c Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 17 Sep 2021 16:07:00 +0300 Subject: [PATCH 29/29] Fixed alias.test to also works with ps The issue is that max_length for prepared statements are different from normal queries, which can optimize the max_length based on the result length. --- mysql-test/main/alias.result | 12 ++++++------ mysql-test/main/alias.test | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mysql-test/main/alias.result b/mysql-test/main/alias.result index d8bb211539f..0d14607f613 100644 --- a/mysql-test/main/alias.result +++ b/mysql-test/main/alias.result @@ -223,16 +223,16 @@ disconnect c1; # create or replace table t1 (a int); create or replace table t2 (b int); -insert into t1 values(1),(2); +insert into t1 values(1<<30),(1<<29); insert into t2 values(1),(2); select t1.a as a1 from t1 as t1,t2 order by t2.b,t1.a; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t1 t1 a a1 3 11 1 Y 32768 0 63 +def test t1 t1 a a1 3 11 10 Y 32768 0 63 a1 -1 -2 -1 -2 +536870912 +1073741824 +536870912 +1073741824 drop table t1,t2; # # End of 10.4 tests diff --git a/mysql-test/main/alias.test b/mysql-test/main/alias.test index f0c4e13abfd..2408509ba10 100644 --- a/mysql-test/main/alias.test +++ b/mysql-test/main/alias.test @@ -231,7 +231,7 @@ disconnect c1; --echo # create or replace table t1 (a int); create or replace table t2 (b int); -insert into t1 values(1),(2); +insert into t1 values(1<<30),(1<<29); insert into t2 values(1),(2); --enable_metadata select t1.a as a1 from t1 as t1,t2 order by t2.b,t1.a;