From 445e518bc7aa5f4bb48d98e798905c9c0b0ce673 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sat, 27 Jan 2018 10:18:20 +0000 Subject: [PATCH 01/54] Copy of commit f8f364b47f2784f16b401f27658f1c16eaf348ec Author: Jay Edgar Date: Tue Oct 17 15:19:31 2017 -0700 Add a hashed, hierarchical, wheel timer implementation Summary: In order to implement idle timeouts on detached sessions we need something inside MySQL that is lightweight and can handle calling events in the future with very little cost for cancelling or resetting the event. A hashed, hi By default the timers are grouped into 10ms buckets (the 'hashed' part), though the size of the buckets is configurable at the creation of the timer. Each wheel (the 'wheel' part) maintains 256 buckets and cascades to the whe Reviewed By: djwatson Differential Revision: D6199806 fbshipit-source-id: 5e1590f --- storage/rocksdb/CMakeLists.txt | 21 +- storage/rocksdb/event_listener.cc | 10 + storage/rocksdb/event_listener.h | 3 + storage/rocksdb/ha_rocksdb.cc | 1884 ++++++++++++----- storage/rocksdb/ha_rocksdb.h | 102 +- storage/rocksdb/myrocks_hotbackup | 686 ++++++ .../rocksdb/include/autoinc_crash_safe.inc | 150 ++ .../rocksdb/{t => include}/bulk_load.inc | 12 +- .../rocksdb/include/bulk_load_unsorted.inc | 143 ++ .../r/add_index_inplace_sstfilewriter.result | 4 + .../r/allow_no_primary_key_with_sk.result | 17 + .../r/allow_to_start_after_corruption.result | 35 + .../rocksdb/r/autoinc_crash_safe.result | 113 + .../r/autoinc_crash_safe_partition.result | 113 + .../mysql-test/rocksdb/r/autoinc_debug.result | 107 + .../mysql-test/rocksdb/r/autoinc_vars.result | 79 + .../mysql-test/rocksdb/r/autoincrement.result | 1 - .../mysql-test/rocksdb/r/bulk_load.result | 7 +- .../rocksdb/r/bulk_load_drop_table.result | 8 + .../rocksdb/r/bulk_load_errors.result | 51 +- .../rocksdb/r/bulk_load_rev_cf.result | 7 +- .../r/bulk_load_rev_cf_and_data.result | 7 +- .../rocksdb/r/bulk_load_rev_data.result | 7 +- .../rocksdb/r/bulk_load_unsorted.result | 58 +- .../rocksdb/r/bulk_load_unsorted_rev.result | 117 + .../mysql-test/rocksdb/r/cardinality.result | 35 + .../r/check_ignore_unknown_options.result | 6 + .../rocksdb/r/col_opt_not_null.result | 32 +- .../mysql-test/rocksdb/r/col_opt_null.result | 32 +- .../rocksdb/r/col_opt_unsigned.result | 32 +- .../rocksdb/r/col_opt_zerofill.result | 32 +- .../rocksdb/r/deadlock_tracking.result | 67 +- .../mysql-test/rocksdb/r/i_s_ddl.result | 19 +- .../mysql-test/rocksdb/r/i_s_deadlock.result | 172 ++ .../rocksdb/r/index_merge_rocksdb.result | 2 +- .../mysql-test/rocksdb/r/issue255.result | 47 + .../rocksdb/r/lock_wait_timeout_stats.result | 8 + .../rocksdb/r/max_open_files.result | 20 + .../r/optimizer_loose_index_scans.result | 42 +- .../mysql-test/rocksdb/r/perf_context.result | 32 +- .../mysql-test/rocksdb/r/rocksdb.result | 91 +- .../mysql-test/rocksdb/r/rocksdb_debug.result | 11 + .../rocksdb/r/rocksdb_range2.result | 2 +- .../mysql-test/rocksdb/r/show_engine.result | 36 +- .../rocksdb/r/skip_validate_tmp_table.result | 17 +- .../r/ttl_primary_read_filtering.result | 28 + .../mysql-test/rocksdb/r/type_float.result | 32 +- .../mysql-test/rocksdb/r/type_varchar.result | 14 +- .../rocksdb/r/use_direct_reads_writes.result | 11 +- .../mysql-test/rocksdb/r/write_sync.result | 5 +- .../t/add_index_inplace_sstfilewriter.test | 5 + .../t/allow_no_primary_key_with_sk.test | 12 + .../t/allow_to_start_after_corruption.test | 75 + .../rocksdb/t/autoinc_crash_safe.cnf | 8 + .../rocksdb/t/autoinc_crash_safe.test | 9 + .../t/autoinc_crash_safe_partition.cnf | 8 + .../t/autoinc_crash_safe_partition.test | 10 + .../rocksdb/t/autoinc_debug-master.opt | 1 + .../mysql-test/rocksdb/t/autoinc_debug.test | 118 ++ .../mysql-test/rocksdb/t/autoinc_vars.test | 38 + .../mysql-test/rocksdb/t/autoincrement.test | 3 - .../mysql-test/rocksdb/t/bulk_load.test | 2 +- .../rocksdb/t/bulk_load_drop_table.test | 19 + .../rocksdb/t/bulk_load_errors.test | 99 +- .../rocksdb/t/bulk_load_rev_cf.test | 2 +- .../rocksdb/t/bulk_load_rev_cf_and_data.test | 2 +- .../rocksdb/t/bulk_load_rev_data.test | 2 +- .../rocksdb/t/bulk_load_unsorted.test | 133 +- .../rocksdb/t/bulk_load_unsorted_errors.test | 1 - .../rocksdb/t/bulk_load_unsorted_rev.test | 5 + .../mysql-test/rocksdb/t/cardinality.test | 42 + .../t/check_ignore_unknown_options.test | 21 + .../rocksdb/t/deadlock_tracking.test | 20 +- .../rocksdb/mysql-test/rocksdb/t/i_s_ddl.test | 7 +- .../mysql-test/rocksdb/t/i_s_deadlock.test | 158 ++ .../rocksdb/t/index_merge_rocksdb.test | 7 +- .../rocksdb/t/index_merge_rocksdb2.test | 4 +- .../t/insert_optimized_config-master.opt | 1 + .../mysql-test/rocksdb/t/issue255.test | 32 + .../rocksdb/t/lock_wait_timeout_stats.test | 4 + .../mysql-test/rocksdb/t/max_open_files.test | 53 + .../mysql-test/rocksdb/t/mysqldump.test | 8 +- .../mysql-test/rocksdb/t/mysqldump2.test | 2 +- .../rocksdb/mysql-test/rocksdb/t/rocksdb.test | 10 +- .../mysql-test/rocksdb/t/rocksdb_debug.test | 14 + .../mysql-test/rocksdb/t/rocksdb_range2.test | 1 + .../rocksdb/t/skip_validate_tmp_table.test | 35 +- .../rocksdb/t/ttl_primary_read_filtering.test | 19 +- .../mysql-test/rocksdb/t/type_float.inc | 13 +- .../mysql-test/rocksdb/t/type_varchar.test | 15 +- .../rocksdb/t/use_direct_reads_writes.test | 80 +- .../mysql-test/rocksdb/t/write_sync.test | 9 +- .../include/rpl_no_unique_check_on_lag.inc | 1 + .../r/singledelete_idempotent_recovery.result | 1 + .../rocksdb_rpl/t/multiclient_2pc.test | 1 + .../t/singledelete_idempotent_recovery.test | 6 + .../rocksdb_stress/r/rocksdb_stress.result | 2 + .../r/rocksdb_stress_crash.result | 2 + .../rocksdb_stress/t/load_generator.py | 13 + .../rocksdb_stress/t/rocksdb_stress.test | 2 + .../t/rocksdb_stress_crash.test | 2 + ...low_to_start_after_corruption_basic.result | 7 + .../r/rocksdb_bytes_per_sync_basic.result | 84 +- ...sdb_flush_memtable_on_analyze_basic.result | 58 - ...ocksdb_ignore_unknown_options_basic.result | 14 + .../r/rocksdb_max_open_files_basic.result | 10 +- .../r/rocksdb_max_row_locks_basic.result | 18 +- ... => rocksdb_two_write_queues_basic.result} | 8 +- .../r/rocksdb_update_cf_options.result | 38 + .../r/rocksdb_update_cf_options_basic.result | 18 +- .../r/rocksdb_wal_bytes_per_sync_basic.result | 84 +- ...allow_to_start_after_corruption_basic.test | 6 + .../t/rocksdb_bytes_per_sync_basic.test | 17 +- ...cksdb_flush_memtable_on_analyze_basic.test | 46 - .../rocksdb_ignore_unknown_options_basic.test | 16 + .../t/rocksdb_max_open_files_basic.test | 10 +- ...st => rocksdb_two_write_queues_basic.test} | 2 +- .../t/rocksdb_update_cf_options.test | 22 + .../t/rocksdb_update_cf_options_basic.test | 19 +- .../t/rocksdb_wal_bytes_per_sync_basic.test | 18 +- storage/rocksdb/properties_collector.cc | 145 +- storage/rocksdb/properties_collector.h | 41 +- storage/rocksdb/rdb_buff.h | 2 +- storage/rocksdb/rdb_cf_options.cc | 8 + storage/rocksdb/rdb_cf_options.h | 3 + storage/rocksdb/rdb_compact_filter.h | 2 +- storage/rocksdb/rdb_datadic.cc | 196 +- storage/rocksdb/rdb_datadic.h | 145 +- storage/rocksdb/rdb_i_s.cc | 149 +- storage/rocksdb/rdb_i_s.h | 1 + storage/rocksdb/rdb_io_watchdog.cc | 2 +- storage/rocksdb/rdb_perf_context.cc | 31 +- storage/rocksdb/rdb_perf_context.h | 14 + storage/rocksdb/rdb_psi.cc | 3 +- storage/rocksdb/rdb_psi.h | 3 +- storage/rocksdb/rdb_sst_info.cc | 38 +- storage/rocksdb/rdb_sst_info.h | 6 +- storage/rocksdb/rdb_utils.cc | 26 + storage/rocksdb/rdb_utils.h | 12 +- 139 files changed, 5711 insertions(+), 1315 deletions(-) create mode 100755 storage/rocksdb/myrocks_hotbackup create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc rename storage/rocksdb/mysql-test/rocksdb/{t => include}/bulk_load.inc (94%) create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result delete mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test delete mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result delete mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result rename storage/rocksdb/mysql-test/rocksdb_sys_vars/r/{rocksdb_concurrent_prepare_basic.result => rocksdb_two_write_queues_basic.result} (54%) create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test delete mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test rename storage/rocksdb/mysql-test/rocksdb_sys_vars/t/{rocksdb_concurrent_prepare_basic.test => rocksdb_two_write_queues_basic.test} (90%) create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index 9e6c8160e32..a5ef5651e1f 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -81,34 +81,33 @@ ELSE() ENDIF() SET(rocksdb_static_libs ) -IF (NOT "$ENV{WITH_SNAPPY}" STREQUAL "") +IF (WITH_SNAPPY) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_SNAPPY}/libsnappy${PIC_EXT}.a) + ${WITH_SNAPPY}/lib/libsnappy${PIC_EXT}.a) ADD_DEFINITIONS(-DSNAPPY) ENDIF() -IF (NOT "$ENV{WITH_LZ4}" STREQUAL "") +IF (WITH_LZ4) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_LZ4}/liblz4${PIC_EXT}.a) + ${WITH_LZ4}/lib/liblz4${PIC_EXT}.a) ADD_DEFINITIONS(-DLZ4) ENDIF() -IF (NOT "$ENV{WITH_BZ2}" STREQUAL "") +IF (WITH_BZ2) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_BZ2}/libbz2${PIC_EXT}.a) + ${WITH_BZ2}/lib/libbz2${PIC_EXT}.a) ADD_DEFINITIONS(-DBZIP2) ENDIF() -# link ZSTD only if instructed -IF (NOT "$ENV{WITH_ZSTD}" STREQUAL "") +IF (WITH_ZSTD) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_ZSTD}/libzstd${PIC_EXT}.a) + ${WITH_ZSTD}/lib/libzstd${PIC_EXT}.a) ADD_DEFINITIONS(-DZSTD) ENDIF() -IF (NOT "$ENV{WITH_TBB}" STREQUAL "") +IF (WITH_TBB) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_TBB}/libtbb${PIC_EXT}.a) + ${WITH_TBB}/lib/libtbb${PIC_EXT}.a) ADD_DEFINITIONS(-DTBB) ENDIF() diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc index 04c433acabe..2da77a16f68 100644 --- a/storage/rocksdb/event_listener.cc +++ b/storage/rocksdb/event_listener.cc @@ -81,4 +81,14 @@ void Rdb_event_listener::OnExternalFileIngested( DBUG_ASSERT(db != nullptr); update_index_stats(info.table_properties); } + +void Rdb_event_listener::OnBackgroundError( + rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) { + rdb_log_status_error(*status, "Error detected in background"); + sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason); + if (status->IsCorruption()) { + rdb_persist_corruption_marker(); + abort(); + } +} } // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h index d535031644b..8772105de36 100644 --- a/storage/rocksdb/event_listener.h +++ b/storage/rocksdb/event_listener.h @@ -37,6 +37,9 @@ public: rocksdb::DB *db, const rocksdb::ExternalFileIngestionInfo &ingestion_info) override; + void OnBackgroundError(rocksdb::BackgroundErrorReason reason, + rocksdb::Status *status) override; + private: Rdb_ddl_manager *m_ddl_manager; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index cc52b343dd1..961ca5d1d17 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -126,10 +126,6 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton, my_core::TABLE_SHARE *table_arg, my_core::MEM_ROOT *mem_root); -bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, - const rocksdb::Slice &eq_cond, - const bool use_all_keys); - static rocksdb::CompactRangeOptions getCompactRangeOptions() { rocksdb::CompactRangeOptions compact_range_options; compact_range_options.bottommost_level_compaction = @@ -408,24 +404,37 @@ static void rocksdb_set_collation_exception_list(THD *thd, void *var_ptr, const void *save); -void rocksdb_set_update_cf_options(THD *thd, - struct st_mysql_sys_var *var, - void *var_ptr, - const void *save); +static int rocksdb_validate_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *save, + st_mysql_value *value); -static void -rocksdb_set_bulk_load(THD *thd, - struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), - void *var_ptr, const void *save); +static void rocksdb_set_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); -static void rocksdb_set_bulk_load_allow_unsorted( - THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), - void *var_ptr, const void *save); +static int rocksdb_check_bulk_load(THD *const thd, + struct st_mysql_sys_var *var + MY_ATTRIBUTE((__unused__)), + void *save, + struct st_mysql_value *value); + +static int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); static void rocksdb_set_max_background_jobs(THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, const void *const save); +static void rocksdb_set_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); +static void rocksdb_set_wal_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); ////////////////////////////////////////////////////////////////////////////// // Options definitions ////////////////////////////////////////////////////////////////////////////// @@ -456,6 +465,7 @@ static char *rocksdb_compact_cf_name; static char *rocksdb_checkpoint_name; static my_bool rocksdb_signal_drop_index_thread; static my_bool rocksdb_strict_collation_check = 1; +static my_bool rocksdb_ignore_unknown_options = 1; static my_bool rocksdb_enable_2pc = 0; static char *rocksdb_strict_collation_exceptions; static my_bool rocksdb_collect_sst_properties = 1; @@ -469,7 +479,6 @@ static int rocksdb_debug_ttl_read_filter_ts = 0; static my_bool rocksdb_debug_ttl_ignore_pk = 0; static my_bool rocksdb_reset_stats = 0; static uint32_t rocksdb_io_write_timeout_secs = 0; -static uint64_t rocksdb_number_stat_computes = 0; static uint32_t rocksdb_seconds_between_stat_computes = 3600; static long long rocksdb_compaction_sequential_deletes = 0l; static long long rocksdb_compaction_sequential_deletes_window = 0l; @@ -480,7 +489,10 @@ static uint32_t rocksdb_table_stats_sampling_pct; static my_bool rocksdb_enable_bulk_load_api = 1; static my_bool rocksdb_print_snapshot_conflict_queries = 0; static my_bool rocksdb_large_prefix = 0; +static my_bool rocksdb_allow_to_start_after_corruption = 0; +std::atomic rocksdb_row_lock_deadlocks(0); +std::atomic rocksdb_row_lock_wait_timeouts(0); std::atomic rocksdb_snapshot_conflict_errors(0); std::atomic rocksdb_wal_group_syncs(0); @@ -491,8 +503,9 @@ static std::unique_ptr rdb_init_rocksdb_db_options(void) { o->listeners.push_back(std::make_shared(&ddl_manager)); o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; + o->max_open_files = -2; // auto-tune to 50% open_files_limit - o->concurrent_prepare = true; + o->two_write_queues = true; o->manual_wal_flush = true; return o; } @@ -571,6 +584,33 @@ static void rocksdb_set_io_write_timeout( RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +enum rocksdb_flush_log_at_trx_commit_type : unsigned int { + FLUSH_LOG_NEVER = 0, + FLUSH_LOG_SYNC, + FLUSH_LOG_BACKGROUND, + FLUSH_LOG_MAX /* must be last */ +}; + +static int rocksdb_validate_flush_log_at_trx_commit( + THD *const thd, + struct st_mysql_sys_var *const var, /* in: pointer to system variable */ + void *var_ptr, /* out: immediate result for update function */ + struct st_mysql_value *const value /* in: incoming value */) { + long long new_value; + + /* value is NULL */ + if (value->val_int(value, &new_value)) { + return HA_EXIT_FAILURE; + } + + if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) { + return HA_EXIT_FAILURE; + } + + *static_cast(var_ptr) = static_cast(new_value); + return HA_EXIT_SUCCESS; +} + static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS}; static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, @@ -578,7 +618,7 @@ static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, nullptr}; const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024; -const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024; +const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024; const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000; const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024; const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024; @@ -618,12 +658,13 @@ static MYSQL_THDVAR_BOOL( bulk_load, PLUGIN_VAR_RQCMDARG, "Use bulk-load mode for inserts. This disables " "unique_checks and enables rocksdb_commit_in_the_middle.", - nullptr, rocksdb_set_bulk_load, FALSE); + rocksdb_check_bulk_load, nullptr, FALSE); static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG, "Allow unsorted input during bulk-load. " "Can be changed only when bulk load is disabled.", - nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE); + rocksdb_check_bulk_load_allow_unsorted, nullptr, + FALSE); static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -724,11 +765,11 @@ static MYSQL_SYSVAR_BOOL( rocksdb_db_options->create_if_missing); static MYSQL_SYSVAR_BOOL( - concurrent_prepare, - *reinterpret_cast(&rocksdb_db_options->concurrent_prepare), + two_write_queues, + *reinterpret_cast(&rocksdb_db_options->two_write_queues), PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr, - rocksdb_db_options->concurrent_prepare); + "DBOptions::two_write_queues for RocksDB", nullptr, nullptr, + rocksdb_db_options->two_write_queues); static MYSQL_SYSVAR_BOOL( manual_wal_flush, @@ -855,7 +896,7 @@ static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "DBOptions::max_open_files for RocksDB", nullptr, nullptr, rocksdb_db_options->max_open_files, - /* min */ -1, /* max */ INT_MAX, 0); + /* min */ -2, /* max */ INT_MAX, 0); static MYSQL_SYSVAR_ULONG(max_total_wal_size, rocksdb_db_options->max_total_wal_size, @@ -1037,16 +1078,18 @@ static MYSQL_SYSVAR_BOOL( rocksdb_db_options->use_adaptive_mutex); static MYSQL_SYSVAR_ULONG(bytes_per_sync, rocksdb_db_options->bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_RQCMDARG, "DBOptions::bytes_per_sync for RocksDB", nullptr, - nullptr, rocksdb_db_options->bytes_per_sync, + rocksdb_set_bytes_per_sync, + rocksdb_db_options->bytes_per_sync, /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_ULONG(wal_bytes_per_sync, rocksdb_db_options->wal_bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_RQCMDARG, "DBOptions::wal_bytes_per_sync for RocksDB", nullptr, - nullptr, rocksdb_db_options->wal_bytes_per_sync, + rocksdb_set_wal_bytes_per_sync, + rocksdb_db_options->wal_bytes_per_sync, /* min */ 0L, /* max */ LONG_MAX, 0); static MYSQL_SYSVAR_BOOL( @@ -1164,22 +1207,17 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options, static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_ALLOCATED, - "Option updates per column family for RocksDB", nullptr, + "Option updates per column family for RocksDB", + rocksdb_validate_update_cf_options, rocksdb_set_update_cf_options, nullptr); -enum rocksdb_flush_log_at_trx_commit_type : unsigned int { - FLUSH_LOG_NEVER = 0, - FLUSH_LOG_SYNC, - FLUSH_LOG_BACKGROUND, - FLUSH_LOG_MAX /* must be last */ -}; - static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit, rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG, "Sync on transaction commit. Similar to " "innodb_flush_log_at_trx_commit. 1: sync on commit, " "0,2: not sync on commit", - nullptr, nullptr, /* default */ FLUSH_LOG_SYNC, + rocksdb_validate_flush_log_at_trx_commit, nullptr, + /* default */ FLUSH_LOG_SYNC, /* min */ FLUSH_LOG_NEVER, /* max */ FLUSH_LOG_BACKGROUND, 0); @@ -1320,6 +1358,11 @@ static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG, "Enable two phase commit for MyRocks", nullptr, nullptr, TRUE); +static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable ignoring unknown options passed to RocksDB", + nullptr, nullptr, TRUE); + static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check, PLUGIN_VAR_RQCMDARG, "Enforce case sensitive collation for MyRocks indexes", @@ -1351,11 +1394,6 @@ static MYSQL_SYSVAR_BOOL( rocksdb_force_flush_memtable_and_lzero_now, rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE); -static MYSQL_THDVAR_BOOL( - flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG, - "Forces memtable flush on ANALZYE table to get accurate cardinality", - nullptr, nullptr, true); - static MYSQL_SYSVAR_UINT( seconds_between_stat_computes, rocksdb_seconds_between_stat_computes, PLUGIN_VAR_RQCMDARG, @@ -1465,6 +1503,13 @@ static MYSQL_SYSVAR_BOOL( "index prefix length is 767.", nullptr, nullptr, FALSE); +static MYSQL_SYSVAR_BOOL( + allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Allow server still to start successfully even if RocksDB corruption is " + "detected.", + nullptr, nullptr, FALSE); + static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { @@ -1490,7 +1535,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(skip_bloom_filter_on_read), MYSQL_SYSVAR(create_if_missing), - MYSQL_SYSVAR(concurrent_prepare), + MYSQL_SYSVAR(two_write_queues), MYSQL_SYSVAR(manual_wal_flush), MYSQL_SYSVAR(create_missing_column_families), MYSQL_SYSVAR(error_if_exists), @@ -1572,6 +1617,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(signal_drop_index_thread), MYSQL_SYSVAR(pause_background_work), MYSQL_SYSVAR(enable_2pc), + MYSQL_SYSVAR(ignore_unknown_options), MYSQL_SYSVAR(strict_collation_check), MYSQL_SYSVAR(strict_collation_exceptions), MYSQL_SYSVAR(collect_sst_properties), @@ -1585,7 +1631,6 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_ttl_ignore_pk), MYSQL_SYSVAR(reset_stats), MYSQL_SYSVAR(io_write_timeout), - MYSQL_SYSVAR(flush_memtable_on_analyze), MYSQL_SYSVAR(seconds_between_stat_computes), MYSQL_SYSVAR(compaction_sequential_deletes), @@ -1606,6 +1651,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(table_stats_sampling_pct), MYSQL_SYSVAR(large_prefix), + MYSQL_SYSVAR(allow_to_start_after_corruption), nullptr}; static rocksdb::WriteOptions @@ -1712,6 +1758,7 @@ protected: ulonglong m_update_count = 0; ulonglong m_delete_count = 0; ulonglong m_lock_count = 0; + std::unordered_map m_auto_incr_map; bool m_is_delayed_snapshot = false; bool m_is_two_phase = false; @@ -1744,7 +1791,30 @@ protected: get_iterator(const rocksdb::ReadOptions &options, rocksdb::ColumnFamilyHandle *column_family) = 0; -public: + /* + @detail + This function takes in the WriteBatch of the transaction to add + all the AUTO_INCREMENT merges. It does so by iterating through + m_auto_incr_map and then constructing key/value pairs to call merge upon. + + @param wb + */ + rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) { + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK();); + + // Iterate through the merge map merging all keys into data dictionary. + rocksdb::Status s; + for (auto &it : m_auto_incr_map) { + s = dict_manager.put_auto_incr_val(wb, it.first, it.second); + if (!s.ok()) { + return s; + } + } + m_auto_incr_map.clear(); + return s; + } + + public: const char *m_mysql_log_file_name; my_off_t m_mysql_log_offset; const char *m_mysql_gtid; @@ -1804,6 +1874,7 @@ public: m_detailed_error.copy(timeout_message( "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); table_handler->m_lock_wait_timeout_counter.inc(); + rocksdb_row_lock_wait_timeouts++; return HA_ERR_LOCK_WAIT_TIMEOUT; } @@ -1813,6 +1884,7 @@ public: false /* just statement */); m_detailed_error = String(); table_handler->m_deadlock_counter.inc(); + rocksdb_row_lock_deadlocks++; return HA_ERR_LOCK_DEADLOCK; } else if (s.IsBusy()) { rocksdb_snapshot_conflict_errors++; @@ -1967,28 +2039,110 @@ public: bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } private: - // The tables we are currently loading. In a partitioned table this can - // have more than one entry - std::vector m_curr_bulk_load; + // The Rdb_sst_info structures we are currently loading. In a partitioned + // table this can have more than one entry + std::vector> m_curr_bulk_load; + std::string m_curr_bulk_load_tablename; + + /* External merge sorts for bulk load: key ID -> merge sort instance */ + std::unordered_map m_key_merge; public: - int finish_bulk_load() { - int rc = 0; + int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf, + Rdb_index_merge **key_merge) { + int res; + auto it = m_key_merge.find(kd_gl_id); + if (it == m_key_merge.end()) { + m_key_merge.emplace( + std::piecewise_construct, std::make_tuple(kd_gl_id), + std::make_tuple( + get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size), + THDVAR(get_thd(), merge_combine_read_size), + THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf)); + it = m_key_merge.find(kd_gl_id); + if ((res = it->second.init()) != 0) { + return res; + } + } + *key_merge = &it->second; + return HA_EXIT_SUCCESS; + } - std::vector::iterator it; - while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) { - int rc2 = (*it)->finalize_bulk_load(); + int finish_bulk_load(int print_client_error = true) { + int rc = 0, rc2; + + std::vector>::iterator it; + for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) { + rc2 = (*it)->commit(print_client_error); if (rc2 != 0 && rc == 0) { rc = rc2; } } - + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); DBUG_ASSERT(m_curr_bulk_load.size() == 0); + // Flush the index_merge sort buffers + if (!m_key_merge.empty()) { + rocksdb::Slice merge_key; + rocksdb::Slice merge_val; + for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { + GL_INDEX_ID index_id = it->first; + std::shared_ptr keydef = + ddl_manager.safe_find(index_id); + std::string table_name = ddl_manager.safe_get_table_name(index_id); + + // Unable to find key definition or table name since the + // table could have been dropped. + // TODO(herman): there is a race here between dropping the table + // and detecting a drop here. If the table is dropped while bulk + // loading is finishing, these keys being added here may + // be missed by the compaction filter and not be marked for + // removal. It is unclear how to lock the sql table from the storage + // engine to prevent modifications to it while bulk load is occurring. + if (keydef == nullptr || table_name.empty()) { + rc2 = HA_ERR_ROCKSDB_BULK_LOAD; + break; + } + const std::string &index_name = keydef->get_name(); + Rdb_index_merge &rdb_merge = it->second; + + // Rdb_sst_info expects a denormalized table name in the form of + // "./database/table" + std::replace(table_name.begin(), table_name.end(), '.', '/'); + table_name = "./" + table_name; + Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(), + *rocksdb_db_options, + THDVAR(get_thd(), trace_sst_api)); + + while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) { + if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) { + break; + } + } + + // rc2 == -1 => finished ok; rc2 > 0 => error + if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) { + if (rc == 0) { + rc = rc2; + } + break; + } + } + m_key_merge.clear(); + + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this + point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + } return rc; } - void start_bulk_load(ha_rocksdb *const bulk_load) { + int start_bulk_load(ha_rocksdb *const bulk_load, + std::shared_ptr sst_info) { /* If we already have an open bulk load of a table and the name doesn't match the current one, close out the currently running one. This allows @@ -1998,29 +2152,46 @@ public: DBUG_ASSERT(bulk_load != nullptr); if (!m_curr_bulk_load.empty() && - !bulk_load->same_table(*m_curr_bulk_load[0])) { + bulk_load->get_table_basename() != m_curr_bulk_load_tablename) { const auto res = finish_bulk_load(); - SHIP_ASSERT(res == 0); - } - - m_curr_bulk_load.push_back(bulk_load); - } - - void end_bulk_load(ha_rocksdb *const bulk_load) { - for (auto it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); - it++) { - if (*it == bulk_load) { - m_curr_bulk_load.erase(it); - return; + if (res != HA_EXIT_SUCCESS) { + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); + return res; } } - // Should not reach here - SHIP_ASSERT(0); + /* + This used to track ha_rocksdb handler objects, but those can be + freed by the table cache while this was referencing them. Instead + of tracking ha_rocksdb handler objects, this now tracks the + Rdb_sst_info allocated, and both the ha_rocksdb handler and the + Rdb_transaction both have shared pointers to them. + + On transaction complete, it will commit each Rdb_sst_info structure found. + If the ha_rocksdb object is freed, etc., it will also commit + the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent. + */ + m_curr_bulk_load.push_back(sst_info); + m_curr_bulk_load_tablename = bulk_load->get_table_basename(); + return HA_EXIT_SUCCESS; } int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); } + const char *get_rocksdb_tmpdir() const { + const char *tmp_dir = THDVAR(get_thd(), tmpdir); + + /* + We want to treat an empty string as nullptr, in these cases DDL operations + will use the default --tmpdir passed to mysql instead. + */ + if (tmp_dir != nullptr && *tmp_dir == '\0') { + tmp_dir = nullptr; + } + return (tmp_dir); + } + /* Flush the data accumulated so far. This assumes we're doing a bulk insert. @@ -2047,6 +2218,20 @@ public: return false; } + void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) { + m_auto_incr_map[gl_index_id] = + std::max(m_auto_incr_map[gl_index_id], curr_id); + } + +#ifndef NDEBUG + ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) { + if (m_auto_incr_map.count(gl_index_id) > 0) { + return m_auto_incr_map[gl_index_id]; + } + return 0; + } +#endif + virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, const rocksdb::Slice &value) = 0; @@ -2070,15 +2255,17 @@ public: virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *value) const = 0; + rocksdb::PinnableSlice *const value) const = 0; virtual rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) = 0; rocksdb::Iterator * get_iterator(rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter, bool fill_cache, + const rocksdb::Slice &eq_cond_lower_bound, + const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false, bool create_snapshot = true) { // Make sure we are not doing both read_current (which implies we don't // want a snapshot) and create_snapshot which makes sure we create @@ -2093,6 +2280,8 @@ public: if (skip_bloom_filter) { options.total_order_seek = true; + options.iterate_lower_bound = &eq_cond_lower_bound; + options.iterate_upper_bound = &eq_cond_upper_bound; } else { // With this option, Iterator::Valid() returns false if key // is outside of the prefix bloom filter range set at Seek(). @@ -2205,6 +2394,12 @@ private: return false; } + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + return false; + } + s = m_rocksdb_tx->Prepare(); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); @@ -2215,13 +2410,24 @@ private: bool commit_no_binlog() override { bool res = false; - release_snapshot(); - const rocksdb::Status s = m_rocksdb_tx->Commit(); + rocksdb::Status s; + + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); res = true; + goto error; } + release_snapshot(); + s = m_rocksdb_tx->Commit(); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + +error: /* Save the transaction object to be reused */ release_tx(); @@ -2242,6 +2448,7 @@ public: m_update_count = 0; m_delete_count = 0; m_lock_count = 0; + m_auto_incr_map.clear(); m_ddl_transaction = false; if (m_rocksdb_tx) { release_snapshot(); @@ -2343,18 +2550,25 @@ public: rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *value) const override { + rocksdb::PinnableSlice *const value) const override { + // clean PinnableSlice right begfore Get() for multiple gets per statement + // the resources after the last Get in a statement are cleared in + // handler::reset call + value->Reset(); global_stats.queries[QUERIES_POINT].inc(); return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); } rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) override { if (++m_lock_count > m_max_row_locks) return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + if (value != nullptr) { + value->Reset(); + } return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, exclusive); } @@ -2480,13 +2694,24 @@ private: bool commit_no_binlog() override { bool res = false; - release_snapshot(); - const rocksdb::Status s = - rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch()); + rocksdb::Status s; + + s = merge_auto_incr_map(m_batch->GetWriteBatch()); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); res = true; + goto error; } + + release_snapshot(); + + s = rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } +error: reset(); m_write_count = 0; @@ -2575,14 +2800,15 @@ public: rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *const value) const override { + rocksdb::PinnableSlice *const value) const override { + value->Reset(); return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key, value); } rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) override { return get(column_family, key, value); } @@ -2709,13 +2935,12 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { Rdb_transaction *&tx = get_tx_from_thd(thd); if (tx != nullptr) { - int rc = tx->finish_bulk_load(); + int rc = tx->finish_bulk_load(false); if (rc != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Error %d finalizing last SST file while " "disconnecting", rc); - abort_with_stack_traces(); } delete tx; @@ -2763,7 +2988,8 @@ static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)), /* target_lsn is set to 0 when MySQL wants to sync the wal files */ - if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) || + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { rocksdb_wal_group_syncs++; s = rdb->FlushWAL(target_lsn == 0 || rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); @@ -3134,79 +3360,54 @@ private: "=========================================\n"; } - static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn, - const GL_INDEX_ID &gl_index_id, - bool is_last_path = false) { - std::string txn_data; + static Rdb_deadlock_info::Rdb_dl_trx_info + get_dl_txn_info(const rocksdb::DeadlockInfo &txn, + const GL_INDEX_ID &gl_index_id) { + Rdb_deadlock_info::Rdb_dl_trx_info txn_data; - /* extract table name and index names using the index id */ - std::string table_name = ddl_manager.safe_get_table_name(gl_index_id); - if (table_name.empty()) { - table_name = + txn_data.trx_id = txn.m_txn_id; + + txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id); + if (txn_data.table_name.empty()) { + txn_data.table_name = "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); } + auto kd = ddl_manager.safe_find(gl_index_id); - std::string idx_name = + txn_data.index_name = (kd) ? kd->get_name() : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); - /* get the name of the column family */ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id); - std::string cf_name = cfh->GetName(); + txn_data.cf_name = cfh->GetName(); + + txn_data.waiting_key = + rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()); + + txn_data.exclusive_lock = txn.m_exclusive; - txn_data += format_string( - "TRANSACTIONID: %u\n" - "COLUMN FAMILY NAME: %s\n" - "WAITING KEY: %s\n" - "LOCK TYPE: %s\n" - "INDEX NAME: %s\n" - "TABLE NAME: %s\n", - txn.m_txn_id, cf_name.c_str(), - rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()) - .c_str(), - txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(), - table_name.c_str()); - if (!is_last_path) { - txn_data += "---------------WAITING FOR---------------\n"; - } return txn_data; } - static std::string - get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) { - std::string path_data; - if (path_entry.limit_exceeded) { - path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; - } else { - path_data += "\n*** DEADLOCK PATH\n" - "=========================================\n"; - for (auto it = path_entry.path.begin(); it != path_entry.path.end(); + static Rdb_deadlock_info + get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) { + Rdb_deadlock_info deadlock_info; + + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) { - auto txn = *it; - const GL_INDEX_ID gl_index_id = { - txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( - txn.m_waiting_key.c_str()))}; - path_data += get_dlock_txn_info(txn, gl_index_id); - } - - DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); - /* print the first txn in the path to display the full deadlock cycle */ - if (!path_entry.path.empty() && !path_entry.limit_exceeded) { - auto txn = path_entry.path[0]; - const GL_INDEX_ID gl_index_id = { - txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( - txn.m_waiting_key.c_str()))}; - path_data += get_dlock_txn_info(txn, gl_index_id, true); - - /* prints the txn id of the transaction that caused the deadlock */ - auto deadlocking_txn = *(path_entry.path.end() - 1); - path_data += - format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n", - deadlocking_txn.m_txn_id); - } + auto txn = *it; + const GL_INDEX_ID gl_index_id = { + txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( + txn.m_waiting_key.c_str()))}; + deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id)); } - - return path_data; + DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); + /* print the first txn in the path to display the full deadlock cycle */ + if (!path_entry.path.empty() && !path_entry.limit_exceeded) { + auto deadlocking_txn = *(path_entry.path.end() - 1); + deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id; + } + return deadlock_info; } public: @@ -3244,9 +3445,48 @@ private: m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; for (auto path_entry : dlock_buffer) { - m_data += get_dlock_path_info(path_entry); + std::string path_data; + if (path_entry.limit_exceeded) { + path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; + } else { + path_data += "\n*** DEADLOCK PATH\n" + "=========================================\n"; + const auto dl_info = get_dl_path_trx_info(path_entry); + for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) { + const auto trx_info = *it; + path_data += format_string( + "TRANSACTION ID: %u\n" + "COLUMN FAMILY NAME: %s\n" + "WAITING KEY: %s\n" + "LOCK TYPE: %s\n" + "INDEX NAME: %s\n" + "TABLE NAME: %s\n", + trx_info.trx_id, trx_info.cf_name.c_str(), + trx_info.waiting_key.c_str(), + trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED", + trx_info.index_name.c_str(), trx_info.table_name.c_str()); + if (it != dl_info.path.end() - 1) { + path_data += "---------------WAITING FOR---------------\n"; + } + } + path_data += + format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", + dl_info.victim_trx_id); + } + m_data += path_data; } } + + std::vector get_deadlock_info() { + std::vector deadlock_info; + auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + for (auto path_entry : dlock_buffer) { + if (!path_entry.limit_exceeded) { + deadlock_info.push_back(get_dl_path_trx_info(path_entry)); + } + } + return deadlock_info; + } }; /** @@ -3337,6 +3577,16 @@ std::vector rdb_get_all_trx_info() { return trx_info; } +/* + returns a vector of info of recent deadlocks + for use by information_schema.rocksdb_deadlock +*/ +std::vector rdb_get_deadlock_info() { + Rdb_snapshot_status showStatus; + Rdb_transaction::walk_tx_list(&showStatus); + return showStatus.get_deadlock_info(); +} + /* Generate the snapshot status table */ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd, stat_print_fn *const stat_print) { @@ -3641,6 +3891,7 @@ static void rocksdb_update_table_stats( comp_stats_t comp_stats; uint lock_wait_timeout_stats; uint deadlock_stats; + uint lock_wait_stats; std::vector tablenames; /* @@ -3687,6 +3938,9 @@ static void rocksdb_update_table_stats( io_perf_write.requests = table_handler->m_io_perf_write.requests.load(); lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load(); deadlock_stats = table_handler->m_deadlock_counter.load(); + lock_wait_stats = + table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT] + .load(); /* Convert from rocksdb timer to mysql timer. RocksDB values are @@ -3714,7 +3968,7 @@ static void rocksdb_update_table_stats( sizeof(tablename_sys)); (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats, - &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats, + &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats, rocksdb_hton_name); } } @@ -3726,8 +3980,9 @@ static rocksdb::Status check_rocksdb_options_compatibility( rocksdb::DBOptions loaded_db_opt; std::vector loaded_cf_descs; - rocksdb::Status status = LoadLatestOptions(dbpath, rocksdb::Env::Default(), - &loaded_db_opt, &loaded_cf_descs); + rocksdb::Status status = + LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt, + &loaded_cf_descs, rocksdb_ignore_unknown_options); // If we're starting from scratch and there are no options saved yet then this // is a valid case. Therefore we can't compare the current set of options to @@ -3766,7 +4021,8 @@ static rocksdb::Status check_rocksdb_options_compatibility( // This is the essence of the function - determine if it's safe to open the // database or not. status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts, - loaded_cf_descs); + loaded_cf_descs, + rocksdb_ignore_unknown_options); return status; } @@ -3778,6 +4034,22 @@ static rocksdb::Status check_rocksdb_options_compatibility( static int rocksdb_init_func(void *const p) { DBUG_ENTER_FUNC(); + if (rdb_check_rocksdb_corruption()) { + sql_print_error("RocksDB: There was a corruption detected in RockDB files. " + "Check error log emitted earlier for more details."); + if (rocksdb_allow_to_start_after_corruption) { + sql_print_information( + "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent " + "server operating if RocksDB corruption is detected."); + } else { + sql_print_error("RocksDB: The server will exit normally and stop restart " + "attempts. Remove %s file from data directory and " + "start mysqld manually.", + rdb_corruption_marker_file_name().c_str()); + exit(0); + } + } + // Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN. static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes."); @@ -3836,6 +4108,16 @@ static int rocksdb_init_func(void *const p) { DBUG_ASSERT(!mysqld_embedded); + if (rocksdb_db_options->max_open_files > (long)open_files_limit) { + sql_print_information("RocksDB: rocksdb_max_open_files should not be " + "greater than the open_files_limit, effective value " + "of rocksdb_max_open_files is being set to " + "open_files_limit / 2."); + rocksdb_db_options->max_open_files = open_files_limit / 2; + } else if (rocksdb_db_options->max_open_files == -2) { + rocksdb_db_options->max_open_files = open_files_limit / 2; + } + rocksdb_stats = rocksdb::CreateDBStatistics(); rocksdb_db_options->statistics = rocksdb_stats; @@ -3886,14 +4168,20 @@ static int rocksdb_init_func(void *const p) { DBUG_RETURN(HA_EXIT_FAILURE); } + if (rocksdb_db_options->allow_mmap_writes && + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " + "to use allow_mmap_writes"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + // sst_file_manager will move deleted rocksdb sst files to trash_dir // to be deleted in a background thread. std::string trash_dir = std::string(rocksdb_datadir) + "/trash"; - rocksdb_db_options->sst_file_manager.reset( - NewSstFileManager(rocksdb_db_options->env, myrocks_logger, trash_dir)); - - rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond( - rocksdb_sst_mgr_rate_bytes_per_sec); + rocksdb_db_options->sst_file_manager.reset(NewSstFileManager( + rocksdb_db_options->env, myrocks_logger, trash_dir, + rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */)); std::vector cf_names; rocksdb::Status status; @@ -3963,9 +4251,15 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_persistent_cache_size_mb > 0) { std::shared_ptr pcache; uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024; - rocksdb::NewPersistentCache( + status = rocksdb::NewPersistentCache( rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), cache_size_bytes, myrocks_logger, true, &pcache); + if (!status.ok()) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Persistent cache returned error: (%s)", + status.getState()); + DBUG_RETURN(HA_EXIT_FAILURE); + } rocksdb_tbl_options->persistent_cache = pcache; } else if (strlen(rocksdb_persistent_cache_path)) { sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb"); @@ -4274,6 +4568,33 @@ static inline void rocksdb_smart_next(bool seek_backward, } } +#ifndef NDEBUG +// simulate that RocksDB has reported corrupted data +static void dbug_change_status_to_corrupted(rocksdb::Status *status) { + *status = rocksdb::Status::Corruption(); +} +#endif + +// If the iterator is not valid it might be because of EOF but might be due +// to IOError or corruption. The good practice is always check it. +// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling +static inline bool is_valid(rocksdb::Iterator *scan_it) { + if (scan_it->Valid()) { + return true; + } else { + rocksdb::Status s = scan_it->status(); + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + if (s.IsIOError() || s.IsCorruption()) { + if (s.IsCorruption()) { + rdb_persist_corruption_marker(); + } + rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); + } + return false; + } +} + /** @brief Example of simple lock controls. The "table_handler" it creates is a @@ -4351,28 +4672,124 @@ std::vector Rdb_open_tables_map::get_table_names(void) const { return names; } +/* + Inspired by innobase_get_int_col_max_value from InnoDB. This returns the + maximum value a type can take on. +*/ +static ulonglong rdb_get_int_col_max_value(const Field *field) { + ulonglong max_value = 0; + switch (field->key_type()) { + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + default: + abort(); + } + + return max_value; +} + void ha_rocksdb::load_auto_incr_value() { + ulonglong auto_incr = 0; + bool validate_last = false, use_datadic = true; +#ifndef NDEBUG + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;); + validate_last = true; +#endif + + if (use_datadic && dict_manager.get_auto_incr_val( + m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) { + update_auto_incr_val(auto_incr); + } + + // If we find nothing in the data dictionary, or if we are in debug mode, + // then call index_last to get the last value. + // + // This is needed when upgrading from a server that did not support + // persistent auto_increment, of if the table is empty. + // + // For debug mode, we are just verifying that the data dictionary value is + // greater than or equal to the maximum value in the table. + if (auto_incr == 0 || validate_last) { + auto_incr = load_auto_incr_value_from_index(); + update_auto_incr_val(auto_incr); + } + + // If we failed to find anything from the data dictionary and index, then + // initialize auto_increment to 1. + if (m_tbl_def->m_auto_incr_val == 0) { + update_auto_incr_val(1); + } +} + +ulonglong ha_rocksdb::load_auto_incr_value_from_index() { const int save_active_index = active_index; active_index = table->s->next_number_index; const uint8 save_table_status = table->status; + ulonglong last_val = 0; - /* - load_auto_incr_value() may be called by statements that - do not execute implicit commits (i.e. SHOW CREATE TABLE). - index_last() creates a snapshot. When a snapshot is created - here, it has to be released as well. (GitHub issue#189) - */ Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); + if (is_new_snapshot) { + tx->acquire_snapshot(true); + } // Do a lookup. We only need index column, so it should be index-only. - // (another reason to make it index-only is that table->read_set is - // not set appropriately and non-index-only lookup will not read the value) + // (another reason to make it index-only is that table->read_set is not set + // appropriately and non-index-only lookup will not read the value) const bool save_keyread_only = m_keyread_only; m_keyread_only = true; + m_key_requested = true; - if (!index_last(table->record[0])) - update_auto_incr_val(); + if (!index_last(table->record[0])) { + Field *field = + table->key_info[table->s->next_number_index].key_part[0].field; + ulonglong max_val = rdb_get_int_col_max_value(field); + my_bitmap_map *const old_map = + dbug_tmp_use_all_columns(table, table->read_set); + last_val = field->val_int(); + if (last_val != max_val) { + last_val++; + } +#ifndef NDEBUG + ulonglong dd_val; + if (last_val <= max_val) { + const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id(); + if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) && + tx->get_auto_incr(gl_index_id) == 0) { + DBUG_ASSERT(dd_val >= last_val); + } + } +#endif + dbug_tmp_restore_column_map(table->read_set, old_map); + } m_keyread_only = save_keyread_only; if (is_new_snapshot) { @@ -4385,38 +4802,46 @@ void ha_rocksdb::load_auto_incr_value() { /* Do what ha_rocksdb::index_end() does. (Why don't we use index_init/index_end? class handler defines index_init - as private, for some reason). - */ + as private, for some reason). + */ release_scan_iterator(); + + return last_val; } -/* Get PK value from table->record[0]. */ -/* - TODO(alexyang): No existing support for auto_increment on non-pk columns, see - end of ha_rocksdb::create. Also see opened issue here: - https://github.com/facebook/mysql-5.6/issues/153 -*/ -void ha_rocksdb::update_auto_incr_val() { +void ha_rocksdb::update_auto_incr_val(ulonglong val) { + ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val; + while ( + auto_incr_val < val && + !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) { + // Do nothing - just loop until auto_incr_val is >= val or we successfully + // set it + } +} + +void ha_rocksdb::update_auto_incr_val_from_field() { Field *field; - longlong new_val; + ulonglong new_val, max_val; field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); my_bitmap_map *const old_map = dbug_tmp_use_all_columns(table, table->read_set); new_val = field->val_int(); // don't increment if we would wrap around - if (new_val != std::numeric_limits::max()) { + if (new_val != max_val) { new_val++; } dbug_tmp_restore_column_map(table->read_set, old_map); - longlong auto_incr_val = m_tbl_def->m_auto_incr_val; - while (auto_incr_val < new_val && - !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, - new_val)) { - // Do nothing - just loop until auto_incr_val is >= new_val or - // we successfully set it + // Only update if positive value was set for auto_incr column. + if (new_val <= max_val) { + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val); + + // Update the in memory auto_incr value in m_tbl_def. + update_auto_incr_val(new_val); } } @@ -4428,12 +4853,12 @@ int ha_rocksdb::load_hidden_pk_value() { Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); + longlong hidden_pk_id = 1; // Do a lookup. if (!index_last(table->record[0])) { /* Decode PK field from the key */ - longlong hidden_pk_id = 0; auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id); if (err) { if (is_new_snapshot) { @@ -4443,11 +4868,11 @@ int ha_rocksdb::load_hidden_pk_value() { } hidden_pk_id++; - longlong old = m_tbl_def->m_hidden_pk_val; - while ( - old < hidden_pk_id && - !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { - } + } + + longlong old = m_tbl_def->m_hidden_pk_val; + while (old < hidden_pk_id && + !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { } if (is_new_snapshot) { @@ -4534,14 +4959,11 @@ ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr), m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr), - m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr), - m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), - m_bulk_load_tx(nullptr), m_encoder_arr(nullptr), + m_dup_sk_packed_tuple_old(nullptr), m_eq_cond_lower_bound(nullptr), + m_eq_cond_upper_bound(nullptr), m_pack_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr), m_row_checksums_checked(0), m_in_rpl_delete_rows(false), - m_in_rpl_update_rows(false), m_force_skip_unique_check(false) { - // TODO(alexyang): create a valid PSI_mutex_key for this mutex - mysql_mutex_init(0, &m_bulk_load_mutex, MY_MUTEX_INIT_FAST); -} + m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {} static const char *ha_rocksdb_exts[] = {NullS}; @@ -4551,8 +4973,8 @@ const char **ha_rocksdb::bas_ext() const { DBUG_RETURN(ha_rocksdb_exts); } -bool ha_rocksdb::same_table(const ha_rocksdb &other) const { - return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename(); +const std::string &ha_rocksdb::get_table_basename() const { + return m_tbl_def->base_tablename(); } /** @@ -4638,8 +5060,12 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, #ifndef NDEBUG read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); #endif - return ts + kd.m_ttl_duration + read_filter_ts <= - static_cast(curr_ts); + bool is_hide_ttl = + ts + kd.m_ttl_duration + read_filter_ts <= static_cast(curr_ts); + if (is_hide_ttl) { + update_row_stats(ROWS_FILTERED); + } + return is_hide_ttl; } void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, @@ -4853,12 +5279,12 @@ int ha_rocksdb::convert_record_to_storage_format( Setup which fields will be unpacked when reading rows @detail - Two special cases when we still unpack all fields: + Three special cases when we still unpack all fields: - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all - fields to find whether there is a row checksum at the end. We could skip - the fields instead of decoding them, but currently we do decoding.) + read all fields to find whether there is a row checksum at the end. We could + skip the fields instead of decoding them, but currently we do decoding.) + - On index merge as bitmap is cleared during that operation @seealso ha_rocksdb::setup_field_converters() @@ -4866,20 +5292,29 @@ int ha_rocksdb::convert_record_to_storage_format( */ void ha_rocksdb::setup_read_decoders() { m_decoders_vect.clear(); + m_key_requested = false; int last_useful = 0; int skip_size = 0; for (uint i = 0; i < table->s->fields; i++) { + // bitmap is cleared on index merge, but it still needs to decode columns + const bool field_requested = + m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || + bitmap_is_clear_all(table->read_set) || + bitmap_is_set(table->read_set, table->field[i]->field_index); + // We only need the decoder if the whole record is stored. if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { + // the field potentially needs unpacking + if (field_requested) { + // the field is in the read set + m_key_requested = true; + } continue; } - // bitmap is cleared on index merge, but it still needs to decode columns - if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_clear_all(table->read_set) || - bitmap_is_set(table->read_set, table->field[i]->field_index)) { + if (field_requested) { // We will need to decode this field m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); last_useful = m_decoders_vect.size(); @@ -4905,13 +5340,18 @@ void ha_rocksdb::setup_read_decoders() { } #ifndef NDEBUG -void dbug_append_garbage_at_end(std::string &on_disk_rec) { - on_disk_rec.append("abc"); +void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) { + std::string str(on_disk_rec->data(), on_disk_rec->size()); + on_disk_rec->Reset(); + str.append("abc"); + on_disk_rec->PinSelf(rocksdb::Slice(str)); } -void dbug_truncate_record(std::string &on_disk_rec) { on_disk_rec.resize(0); } +void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) { + on_disk_rec->remove_suffix(on_disk_rec->size()); +} -void dbug_modify_rec_varchar12(std::string &on_disk_rec) { +void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) { std::string res; // The record is NULL-byte followed by VARCHAR(10). // Put the NULL-byte @@ -4920,7 +5360,8 @@ void dbug_modify_rec_varchar12(std::string &on_disk_rec) { res.append("\xC", 1); res.append("123456789ab", 12); - on_disk_rec.assign(res); + on_disk_rec->Reset(); + on_disk_rec->PinSelf(rocksdb::Slice(res)); } void dbug_modify_key_varchar8(String &on_disk_rec) { @@ -4942,16 +5383,15 @@ void dbug_create_err_inplace_alter() { int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, uchar *const buf) { - DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", - dbug_append_garbage_at_end(m_retrieved_record);); - DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", - dbug_truncate_record(m_retrieved_record);); - DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3", - dbug_modify_rec_varchar12(m_retrieved_record);); - const rocksdb::Slice retrieved_rec_slice(&m_retrieved_record.front(), - m_retrieved_record.size()); - return convert_record_from_storage_format(key, &retrieved_rec_slice, buf); + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", + dbug_append_garbage_at_end(&m_retrieved_record);); + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", + dbug_truncate_record(&m_retrieved_record);); + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3", + dbug_modify_rec_varchar12(&m_retrieved_record);); + + return convert_record_from_storage_format(key, &m_retrieved_record, buf); } int ha_rocksdb::convert_blob_from_storage_format( @@ -5115,9 +5555,13 @@ int ha_rocksdb::convert_record_from_storage_format( Rdb_key_def::get_unpack_header_size(unpack_info[0])); } - int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, - unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */); + int err = HA_EXIT_SUCCESS; + if (m_key_requested) { + err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, + unpack_info ? &unpack_slice : nullptr, + false /* verify_checksum */); + } + if (err != HA_EXIT_SUCCESS) { return err; } @@ -5361,6 +5805,11 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, m_pack_buffer = reinterpret_cast(my_malloc(max_packed_sk_len, MYF(0))); + m_eq_cond_upper_bound = + reinterpret_cast(my_malloc(max_packed_sk_len, MYF(0))); + m_eq_cond_lower_bound = + reinterpret_cast(my_malloc(max_packed_sk_len, MYF(0))); + /* If inplace alter is happening, allocate special buffers for unique secondary index duplicate checking. @@ -5375,6 +5824,7 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr || m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr || m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr || + m_eq_cond_upper_bound == nullptr || m_eq_cond_lower_bound == nullptr || (alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr || m_dup_sk_packed_tuple_old == nullptr))) { // One or more of the above allocations failed. Clean up and exit @@ -5413,6 +5863,12 @@ void ha_rocksdb::free_key_buffers() { my_free(m_dup_sk_packed_tuple_old); m_dup_sk_packed_tuple_old = nullptr; + + my_free(m_eq_cond_upper_bound); + m_eq_cond_upper_bound = nullptr; + + my_free(m_eq_cond_lower_bound); + m_eq_cond_lower_bound = nullptr; } void ha_rocksdb::set_use_read_free_rpl(const char *const whitelist) { @@ -5531,11 +5987,13 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { */ m_verify_row_debug_checksums = false; - /* TODO: move the following to where TABLE_SHARE is opened: */ - if (table->found_next_number_field) + /* Load auto_increment value only once on first use. */ + if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) { load_auto_incr_value(); + } - if (has_hidden_pk(table) && + /* Load hidden pk only once on first use. */ + if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 && (err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) { free_key_buffers(); DBUG_RETURN(err); @@ -6460,6 +6918,20 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; + if (create_info->auto_increment_value) { + bool autoinc_upgrade_test = false; + m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;); + if (!autoinc_upgrade_test) { + auto s = dict_manager.put_auto_incr_val( + batch, m_tbl_def->get_autoincr_gl_index_id(), + m_tbl_def->m_auto_incr_val); + if (!s.ok()) { + goto error; + } + } + } + dict_manager.lock(); err = ddl_manager.put_and_write(m_tbl_def, batch); if (err != HA_EXIT_SUCCESS) { @@ -6475,23 +6947,6 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, dict_manager.unlock(); - if (create_info->auto_increment_value) - m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; - - /* - We only support auto_increment at start of the PRIMARY KEY. - */ - // Field *field; - // if ((field= table_arg->next_number_field)) - /* TODO mdcallag: disable this for now to let UNIQUE indexes kind of work - if ((field= table_arg->found_next_number_field)) - { - int pk= table_arg->s->primary_key; - Field *pk_field= table_arg->key_info[pk].key_part[0].field; - if (field->field_index != pk_field->field_index) - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - */ DBUG_RETURN(HA_EXIT_SUCCESS); error: @@ -6589,7 +7044,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, */ rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice); - while (m_scan_it->Valid()) { + while (is_valid(m_scan_it)) { /* We are using full key and we've hit an exact match, or... @@ -6629,12 +7084,12 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, from the POV of the current transaction. If it has, try going to the next key. */ - while (m_scan_it->Valid() && kd.has_ttl() && + while (is_valid(m_scan_it) && kd.has_ttl() && should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); } - return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; + return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; } int ha_rocksdb::position_to_correct_key( @@ -6794,7 +7249,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, bool covered_lookup = m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap); - if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); if (pk_size == RDB_INVALID_KEY_LEN) { @@ -6895,7 +7350,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { /* Use STATUS_NOT_FOUND when record not found or some error occurred */ table->status = STATUS_NOT_FOUND; - if (m_scan_it->Valid()) { + if (is_valid(m_scan_it)) { rocksdb::Slice key = m_scan_it->key(); /* Check if we've ran out of records of this index */ @@ -6916,8 +7371,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { bool covered_lookup = m_keyread_only || m_key_descr_arr[keyno]->covers_lookup( table, &value, &m_lookup_bitmap); - if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && - !has_hidden_pk(table)) { + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { rc = m_key_descr_arr[keyno]->unpack_record( table, buf, &key, &value, m_verify_row_debug_checksums); global_stats.covered_secondary_key_lookups.inc(); @@ -7209,7 +7663,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, while (1) { rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); - if (!m_scan_it->Valid()) { + if (!is_valid(m_scan_it)) { table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; } @@ -7477,7 +7931,7 @@ void dbug_dump_database(rocksdb::DB *const db) { rocksdb::Status ha_rocksdb::get_for_update( Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value) const { + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const { DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); const bool exclusive = m_lock_rows != RDB_LOCK_READ; @@ -7561,6 +8015,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, &m_retrieved_record); } + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + if (!s.IsNotFound() && !s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, m_table_handler)); @@ -7571,9 +8028,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, if (found) { /* If we found the record, but it's expired, pretend we didn't find it. */ if (!skip_ttl_check && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, - rocksdb::Slice(&m_retrieved_record.front(), - m_retrieved_record.size()), + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, tx->m_snapshot_timestamp)) { DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); } @@ -7750,11 +8205,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) { - kd.get_supremum_key(key, &key_size); - } else { - kd.get_infimum_key(key, &key_size); - } + kd.get_first_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8333,23 +8784,21 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, The bloom filter may need to be disabled for this lookup. */ - const bool total_order_seek = !can_use_bloom_filter( + const bool total_order_seek = !check_bloom_and_set_bounds( ha_thd(), kd, new_slice, all_parts_used); const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); - /* - psergey-todo: we just need to take lock, lookups not needed: - */ - std::string dummy_value; const rocksdb::Status s = - get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value); + get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr); if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler); } rocksdb::Iterator *const iter = row_info.tx->get_iterator( - kd.get_cf(), total_order_seek, fill_cache, true /* read current data */, + kd.get_cf(), total_order_seek, fill_cache, + m_eq_cond_lower_bound_slice, m_eq_cond_upper_bound_slice, + true /* read current data */, false /* acquire snapshot */); /* Need to scan the transaction to see if there is a duplicate key. @@ -8392,9 +8841,7 @@ int ha_rocksdb::check_uniqueness_and_lock( m_retrieved_record by check_and_lock_unique_pk(). */ if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, - rocksdb::Slice(&m_retrieved_record.front(), - m_retrieved_record.size()), + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, (row_info.tx->m_snapshot_timestamp ? row_info.tx->m_snapshot_timestamp : static_cast(std::time(nullptr))))) { @@ -8443,109 +8890,48 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, const rocksdb::Slice &key, const rocksdb::Slice &value, bool sort) { DBUG_ENTER_FUNC(); - + int res; rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); - DBUG_ASSERT(cf != nullptr); - int res = HA_EXIT_SUCCESS; + // In the case of unsorted inserts, m_sst_info allocated here is not + // used to store the keys. It is still used to indicate when tables + // are switched. + if (m_sst_info == nullptr || m_sst_info->is_committed()) { + m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, + kd.get_name(), cf, *rocksdb_db_options, + THDVAR(ha_thd(), trace_sst_api))); + res = tx->start_bulk_load(this, m_sst_info); + if (res != HA_EXIT_SUCCESS) { + DBUG_RETURN(res); + } + } + DBUG_ASSERT(m_sst_info); if (sort) { - GL_INDEX_ID kd_gl_id = kd.get_gl_index_id(); - auto it = m_key_merge.find(kd_gl_id); - if (it == m_key_merge.end()) { - m_key_merge.emplace( - std::piecewise_construct, std::make_tuple(kd_gl_id), - std::make_tuple( - thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size), - THDVAR(ha_thd(), merge_combine_read_size), - THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf)); - it = m_key_merge.find(kd_gl_id); - if ((res = it->second.init()) != 0) { - DBUG_RETURN(res); - } + Rdb_index_merge *key_merge; + DBUG_ASSERT(cf != nullptr); - if (m_bulk_load_tx == nullptr) { - tx->start_bulk_load(this); - m_bulk_load_tx = tx; - } + res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge); + if (res == HA_EXIT_SUCCESS) { + res = key_merge->add(key, value); } - res = it->second.add(key, value); } else { - if (!m_sst_info) { - m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, - kd.get_name(), cf, *rocksdb_db_options, - THDVAR(ha_thd(), trace_sst_api))); - tx->start_bulk_load(this); - m_bulk_load_tx = tx; - } - - DBUG_ASSERT(m_sst_info); - res = m_sst_info->put(key, value); } DBUG_RETURN(res); } -int ha_rocksdb::finalize_bulk_load() { +int ha_rocksdb::finalize_bulk_load(bool print_client_error) { DBUG_ENTER_FUNC(); - DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info, - m_bulk_load_tx != nullptr); - - /* Skip if there are no possible ongoing bulk loads */ - if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) { - DBUG_RETURN(HA_EXIT_SUCCESS); - } - int res = HA_EXIT_SUCCESS; - RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex); - + /* Skip if there are no possible ongoing bulk loads */ if (m_sst_info) { - res = m_sst_info->commit(); + res = m_sst_info->commit(print_client_error); m_sst_info.reset(); } - - if (!m_key_merge.empty()) { - rocksdb::Slice merge_key; - rocksdb::Slice merge_val; - for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { - const std::string &index_name = - ddl_manager.safe_find(it->first)->get_name(); - Rdb_index_merge &rdb_merge = it->second; - Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name, - rdb_merge.get_cf(), *rocksdb_db_options, - THDVAR(ha_thd(), trace_sst_api)); - - while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) { - if ((res = sst_info.put(merge_key, merge_val)) != 0) { - break; - } - } - // res == -1 => finished ok; res > 0 => error - if (res <= 0) { - if ((res = sst_info.commit()) != 0) { - break; - } - } - } - m_key_merge.clear(); - - /* - Explicitly tell jemalloc to clean up any unused dirty pages at this point. - See https://reviews.facebook.net/D63723 for more details. - */ - purge_all_jemalloc_arenas(); - } - - if (m_bulk_load_tx != nullptr) { - m_bulk_load_tx->end_bulk_load(this); - m_bulk_load_tx = nullptr; - } - - RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex); - DBUG_RETURN(res); } @@ -8575,7 +8961,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, } if (table->next_number_field) { - update_auto_incr_val(); + update_auto_incr_val_from_field(); } int rc = HA_EXIT_SUCCESS; @@ -8806,6 +9192,42 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, DBUG_RETURN(HA_EXIT_SUCCESS); } +/* + Setting iterator upper/lower bounds for Seek/SeekForPrev. + This makes RocksDB to avoid scanning tombstones outside of + the given key ranges, when prefix_same_as_start=true was not passed + (when prefix bloom filter can not be used). + Inversing upper/lower bound is necessary on reverse order CF. + This covers HA_READ_PREFIX_LAST* case as well. For example, + if given query eq condition was 12 bytes and condition was + 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST, + eq_cond_len was 11 (see calc_eq_cond_len() for details). + If the index was reverse order, upper bound would be + 0x0000b3eb003f65c5e78857, and lower bound would be + 0x0000b3eb003f65c5e78859. These cover given eq condition range. +*/ +void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond) { + uint eq_cond_len = eq_cond.size(); + DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE); + memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); + kd.successor(m_eq_cond_upper_bound, eq_cond_len); + memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); + kd.predecessor(m_eq_cond_lower_bound, eq_cond_len); + + if (kd.m_is_reverse_cf) { + m_eq_cond_upper_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len); + m_eq_cond_lower_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len); + } else { + m_eq_cond_upper_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len); + m_eq_cond_lower_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len); + } +} + /* Open a cursor */ @@ -8822,7 +9244,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, bool skip_bloom = true; const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); - if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys)) { + if (check_bloom_and_set_bounds(ha_thd(), kd, eq_cond, use_all_keys)) { skip_bloom = false; } @@ -8862,7 +9284,9 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, read_opts.snapshot = m_scan_it_snapshot; m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); } else { - m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache); + m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache, + m_eq_cond_lower_bound_slice, + m_eq_cond_upper_bound_slice); } m_scan_it_skips_bloom = skip_bloom; } @@ -8881,14 +9305,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - if (m_pk_descr->m_is_reverse_cf) - m_pk_descr->get_supremum_key(m_pk_packed_tuple, &key_size); - else - m_pk_descr->get_infimum_key(m_pk_packed_tuple, &key_size); + m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - setup_scan_iterator(*m_pk_descr, &table_key); + setup_scan_iterator(*m_pk_descr, &table_key, false, + Rdb_key_def::INDEX_NUMBER_SIZE); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } @@ -8960,7 +9382,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { table->status = STATUS_NOT_FOUND; stats.rows_requested++; - if (!m_scan_it || !m_scan_it->Valid()) { + if (!m_scan_it || !is_valid(m_scan_it)) { /* We can get here when SQL layer has called @@ -8982,7 +9404,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { m_scan_it->Prev(); /* this call cannot fail */ } - if (!m_scan_it->Valid()) { + if (!is_valid(m_scan_it)) { rc = HA_ERR_END_OF_FILE; break; } @@ -9290,6 +9712,10 @@ int ha_rocksdb::info(uint flag) { uint64_t memtableCount; uint64_t memtableSize; + // the stats below are calculated from skiplist wich is a probablistic + // data structure, so the results vary between test runs + // it also can return 0 for quite a large tables which means that + // cardinality for memtable only indxes will be reported as 0 rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r, &memtableCount, &memtableSize); @@ -9323,7 +9749,6 @@ int ha_rocksdb::info(uint flag) { if (flag & HA_STATUS_CONST) { ref_length = m_pk_descr->max_storage_fmt_length(); - // TODO: Needs to reimplement after having real index statistics for (uint i = 0; i < m_tbl_def->m_key_count; i++) { if (is_hidden_pk(i, table, m_tbl_def)) { continue; @@ -9576,20 +10001,6 @@ void ha_rocksdb::read_thd_vars(THD *const thd) { m_checksums_pct = THDVAR(thd, checksums_pct); } -const char *ha_rocksdb::thd_rocksdb_tmpdir() { - const char *tmp_dir = THDVAR(ha_thd(), tmpdir); - - /* - We want to treat an empty string as nullptr, in these cases DDL operations - will use the default --tmpdir passed to mysql instead. - */ - if (tmp_dir != nullptr && *tmp_dir == '\0') { - tmp_dir = nullptr; - } - - return (tmp_dir); -} - /** @return HA_EXIT_SUCCESS OK @@ -9747,6 +10158,13 @@ ha_rocksdb::get_range(const int &i, return myrocks::get_range(*m_key_descr_arr[i], buf); } +/* + This function is called with total_order_seek=true, but + upper/lower bound setting is not necessary. + Boundary set is useful when there is no matching key, + but in drop_index_thread's case, it means index is marked as removed, + so no further seek will happen for the index id. +*/ static bool is_myrocks_index_empty( rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf, const rocksdb::ReadOptions &read_opts, @@ -9817,7 +10235,7 @@ void Rdb_drop_index_thread::run() { "from cf id %u. MyRocks data dictionary may " "get corrupted.", d.cf_id); - abort_with_stack_traces(); + abort(); } rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); DBUG_ASSERT(cfh); @@ -9906,6 +10324,7 @@ int ha_rocksdb::delete_table(const char *const tablename) { the persistent data dictionary). */ ddl_manager.remove(tbl, batch, true); + int err = dict_manager.commit(batch); if (err) { DBUG_RETURN(err); @@ -9936,10 +10355,12 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { const Rdb_key_def &kd = *tbl->m_key_descr_arr[i]; kd.get_infimum_key(reinterpret_cast(key_buf), &key_len); rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); - + const rocksdb::Slice table_key(key_buf, key_len); + setup_iterator_bounds(kd, table_key); + opts.iterate_lower_bound = &m_eq_cond_lower_bound_slice; + opts.iterate_upper_bound = &m_eq_cond_upper_bound_slice; std::unique_ptr it(rdb->NewIterator(opts, cf)); - const rocksdb::Slice table_key(key_buf, key_len); it->Seek(table_key); while (it->Valid()) { const rocksdb::Slice key = it->key(); @@ -10018,6 +10439,7 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) { const std::unique_ptr wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); dict_manager.lock(); + if (ddl_manager.rename(from_str, to_str, batch)) { rc = HA_ERR_NO_SUCH_TABLE; } else { @@ -10068,7 +10490,7 @@ int ha_rocksdb::extra(enum ha_extra_function operation) { If the table has blobs, then they are part of m_retrieved_record. This call invalidates them. */ - m_retrieved_record.clear(); + m_retrieved_record.Reset(); break; default: break; @@ -10238,24 +10660,21 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, std::unordered_map> ranges; std::unordered_set ids_to_check; - std::unordered_map ids_to_keyparts; std::vector buf(table_arg->s->keys * 2 * Rdb_key_def::INDEX_NUMBER_SIZE); + std::unordered_map stats; for (uint i = 0; i < table_arg->s->keys; i++) { const auto bufp = &buf[i * 2 * Rdb_key_def::INDEX_NUMBER_SIZE]; const Rdb_key_def &kd = *m_key_descr_arr[i]; + const GL_INDEX_ID index_id = kd.get_gl_index_id(); ranges[kd.get_cf()].push_back(get_range(i, bufp)); - ids_to_check.insert(kd.get_gl_index_id()); - ids_to_keyparts[kd.get_gl_index_id()] = kd.get_key_parts(); - } - // for analyze statements, force flush on memtable to get accurate cardinality - Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); - if (thd != nullptr && THDVAR(thd, flush_memtable_on_analyze) && - !rocksdb_pause_background_work) { - for (auto it : ids_to_check) { - rdb->Flush(rocksdb::FlushOptions(), cf_manager.get_cf(it.cf_id)); - } + ids_to_check.insert(index_id); + // Initialize the stats to 0. If there are no files that contain + // this gl_index_id, then 0 should be stored for the cached stats. + stats[index_id] = Rdb_index_stats(index_id); + DBUG_ASSERT(kd.get_key_parts() > 0); + stats[index_id].m_distinct_keys_per_prefix.resize(kd.get_key_parts()); } // get RocksDB table properties for these ranges @@ -10272,15 +10691,6 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, } int num_sst = 0; - // group stats per index id - std::unordered_map stats; - for (const auto &it : ids_to_check) { - // Initialize the stats to 0. If there are no files that contain - // this gl_index_id, then 0 should be stored for the cached stats. - stats[it] = Rdb_index_stats(it); - DBUG_ASSERT(ids_to_keyparts.count(it) > 0); - stats[it].m_distinct_keys_per_prefix.resize(ids_to_keyparts[it]); - } for (const auto &it : props) { std::vector sst_stats; Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats); @@ -10307,6 +10717,53 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, num_sst++; } + // calculate memtable cardinality + Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct); + auto read_opts = rocksdb::ReadOptions(); + read_opts.read_tier = rocksdb::ReadTier::kMemtableTier; + for (uint i = 0; i < table_arg->s->keys; i++) { + const Rdb_key_def &kd = *m_key_descr_arr[i]; + Rdb_index_stats &stat = stats[kd.get_gl_index_id()]; + + uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto r = get_range(i, r_buf); + uint64_t memtableCount; + uint64_t memtableSize; + rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memtableCount, + &memtableSize); + if (memtableCount < (uint64_t)stat.m_rows / 10) { + // skip tables that already have enough stats from SST files to reduce + // overhead and avoid degradation of big tables stats by sampling from + // relatively tiny (less than 10% of full data set) memtable dataset + continue; + } + + std::unique_ptr it = std::unique_ptr( + rdb->NewIterator(read_opts, kd.get_cf())); + + uchar *first_key; + uint key_size; + if (is_pk(i, table, m_tbl_def)) { + first_key = m_pk_packed_tuple; + } else { + first_key = m_sk_packed_tuple; + } + kd.get_first_key(first_key, &key_size); + rocksdb::Slice first_index_key((const char *)first_key, key_size); + + cardinality_collector.Reset(); + for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) { + const rocksdb::Slice key = it->key(); + if (!kd.covers_key(key)) { + break; // end of this index + } + stat.m_rows++; + + cardinality_collector.ProcessKey(key, &kd, &stat); + } + cardinality_collector.AdjustStats(&stat); + } + // set and persist new stats ddl_manager.set_stats(stats); ddl_manager.persist_stats(true); @@ -10354,32 +10811,73 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, off = 1; } - longlong new_val; + Field *field; + ulonglong new_val, max_val; + field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); // Local variable reference to simplify code below - std::atomic &auto_incr = m_tbl_def->m_auto_incr_val; + auto &auto_incr = m_tbl_def->m_auto_incr_val; - if (inc == 1 && off == 1) { + if (inc == 1) { + DBUG_ASSERT(off == 1); // Optimization for the standard case where we are always simply // incrementing from the last position // Use CAS operation in a loop to make sure automically get the next auto - // increment value while ensuring tha we don't wrap around to a negative + // increment value while ensuring that we don't wrap around to a negative // number. + // + // We set auto_incr to the min of max_val and new_val + 1. This means that + // if we're at the maximum, we should be returning the same value for + // multiple rows, resulting in duplicate key errors (as expected). + // + // If we return values greater than the max, the SQL layer will "truncate" + // the value anyway, but it means that we store invalid values into + // auto_incr that will be visible in SHOW CREATE TABLE. new_val = auto_incr; - while (new_val != std::numeric_limits::max()) { - if (auto_incr.compare_exchange_weak(new_val, new_val + 1)) { + while (new_val != std::numeric_limits::max()) { + if (auto_incr.compare_exchange_weak(new_val, + std::min(new_val + 1, max_val))) { break; } } } else { - // The next value can be more complicated if either `inc` or 'off' is not 1 - longlong last_val = auto_incr; + // The next value can be more complicated if either 'inc' or 'off' is not 1 + ulonglong last_val = auto_incr; // Loop until we can correctly update the atomic value do { - if (((last_val - off) / inc) == - (std::numeric_limits::max() - off) / inc) { + DBUG_ASSERT(last_val > 0); + // Calculate the next value in the auto increment series: offset + // + N * increment where N is 0, 1, 2, ... + // + // For further information please visit: + // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html + // + // The following is confusing so here is an explanation: + // To get the next number in the sequence above you subtract out the + // offset, calculate the next sequence (N * increment) and then add the + // offset back in. + // + // The additions are rearranged to avoid overflow. The following is + // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact + // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: + // + // (a+b)/c + // = (a - a%c + a%c + b - b%c + b%c) / c + // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c + // = a/c + b/c + (a%c + b%c) / c + // + // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the + // following statement. + ulonglong n = + (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; + + // Check if n * inc + off will overflow. This can only happen if we have + // an UNSIGNED BIGINT field. + if (n > (std::numeric_limits::max() - off) / inc) { + DBUG_ASSERT(max_val == std::numeric_limits::max()); // The 'last_val' value is already equal to or larger than the largest // value in the sequence. Continuing would wrap around (technically // the behavior would be undefined). What should we do? @@ -10391,31 +10889,30 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, // may not be in our sequence, but it is guaranteed to be equal // to or larger than any other value already inserted. // - // For now I'm going to take option @2. - new_val = std::numeric_limits::max(); + // For now I'm going to take option 2. + // + // Returning ULLONG_MAX from get_auto_increment will cause the SQL + // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to + // the SE API for get_auto_increment, inserts will fail with + // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but + // inserts will fail with ER_DUP_ENTRY for other types (or no failure + // if the column is in a non-unique SK). + new_val = std::numeric_limits::max(); auto_incr = new_val; // Store the largest value into auto_incr break; } - // Calculate the next value in the auto increment series: - // offset + N * increment - // where N is 0, 1, 2, ... - // - // For further information please visit: - // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html - // - // The following is confusing so here is an explanation: - // To get the next number in the sequence above you subtract out - // the offset, calculate the next sequence (N * increment) and then add - // the offset back in. - new_val = (((last_val - off) + (inc - 1)) / inc) * inc + off; + new_val = n * inc + off; // Attempt to store the new value (plus 1 since m_auto_incr_val contains // the next available value) into the atomic value. If the current // value no longer matches what we have in 'last_val' this will fail and // we will repeat the loop (`last_val` will automatically get updated // with the current value). - } while (!auto_incr.compare_exchange_weak(last_val, new_val + 1)); + // + // See above explanation for inc == 1 for why we use std::min. + } while (!auto_incr.compare_exchange_weak(last_val, + std::min(new_val + 1, max_val))); } *first_value = new_val; @@ -10521,7 +11018,8 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( ~(my_core::Alter_inplace_info::DROP_INDEX | my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | my_core::Alter_inplace_info::ADD_INDEX | - my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX | + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) { DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } @@ -10532,6 +11030,13 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } + /* We only support changing auto_increment for table options. */ + if ((ha_alter_info->handler_flags & + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) && + !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + DBUG_RETURN(my_core::HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE); } @@ -10571,111 +11076,121 @@ bool ha_rocksdb::prepare_inplace_alter_table( DBUG_ASSERT(altered_table != nullptr); DBUG_ASSERT(ha_alter_info != nullptr); - const uint old_n_keys = m_tbl_def->m_key_count; + Rdb_tbl_def *new_tdef = nullptr; + std::shared_ptr *old_key_descr = nullptr; + std::shared_ptr *new_key_descr = nullptr; + uint old_n_keys = m_tbl_def->m_key_count; uint new_n_keys = altered_table->s->keys; - - if (has_hidden_pk(altered_table)) { - new_n_keys += 1; - } - - const TABLE *const old_table = table; - std::shared_ptr *const old_key_descr = - m_tbl_def->m_key_descr_arr; - std::shared_ptr *const new_key_descr = - new std::shared_ptr[new_n_keys]; - - Rdb_tbl_def *const new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); - new_tdef->m_key_descr_arr = new_key_descr; - new_tdef->m_key_count = new_n_keys; - new_tdef->m_auto_incr_val = - m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); - new_tdef->m_hidden_pk_val = - m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); - - if (ha_alter_info->handler_flags & - (my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | - my_core::Alter_inplace_info::ADD_INDEX | - my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) && - create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { - /* Delete the new key descriptors */ - delete[] new_key_descr; - - /* - Explicitly mark as nullptr so we don't accidentally remove entries - from data dictionary on cleanup (or cause double delete[]). - */ - new_tdef->m_key_descr_arr = nullptr; - delete new_tdef; - - my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0)); - DBUG_RETURN(HA_EXIT_FAILURE); - } - std::unordered_set> added_indexes; std::unordered_set dropped_index_ids; + uint n_dropped_keys = 0; + uint n_added_keys = 0; + ulonglong max_auto_incr = 0; - uint i; - uint j; + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::DROP_INDEX | + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { - /* Determine which(if any) key definition(s) need to be dropped */ - for (i = 0; i < ha_alter_info->index_drop_count; i++) { - const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i]; - for (j = 0; j < old_n_keys; j++) { - const KEY *const old_key = - &old_table->key_info[old_key_descr[j]->get_keyno()]; - - if (!compare_keys(old_key, dropped_key)) { - dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); - break; - } + if (has_hidden_pk(altered_table)) { + new_n_keys += 1; } - } - /* Determine which(if any) key definitions(s) need to be added */ - int identical_indexes_found = 0; - for (i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY *const added_key = - &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; - for (j = 0; j < new_n_keys; j++) { - const KEY *const new_key = - &altered_table->key_info[new_key_descr[j]->get_keyno()]; - if (!compare_keys(new_key, added_key)) { - /* - Check for cases where an 'identical' index is being dropped and - re-added in a single ALTER statement. Turn this into a no-op as the - index has not changed. + const TABLE *const old_table = table; + old_key_descr = m_tbl_def->m_key_descr_arr; + new_key_descr = new std::shared_ptr[new_n_keys]; - E.G. Unique index -> non-unique index requires no change + new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); + new_tdef->m_key_descr_arr = new_key_descr; + new_tdef->m_key_count = new_n_keys; + new_tdef->m_auto_incr_val = + m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); + new_tdef->m_hidden_pk_val = + m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); - Note that cases where the index name remains the same but the - key-parts are changed is already handled in create_inplace_key_defs. - In these cases the index needs to be rebuilt. + if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { + /* Delete the new key descriptors */ + delete[] new_key_descr; + + /* + Explicitly mark as nullptr so we don't accidentally remove entries + from data dictionary on cleanup (or cause double delete[]). */ - if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) { - dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id()); - identical_indexes_found++; - } else { - added_indexes.insert(new_key_descr[j]); - } + new_tdef->m_key_descr_arr = nullptr; + delete new_tdef; - break; + my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + uint i; + uint j; + + /* Determine which(if any) key definition(s) need to be dropped */ + for (i = 0; i < ha_alter_info->index_drop_count; i++) { + const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i]; + for (j = 0; j < old_n_keys; j++) { + const KEY *const old_key = + &old_table->key_info[old_key_descr[j]->get_keyno()]; + + if (!compare_keys(old_key, dropped_key)) { + dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); + break; + } } } - } - const uint n_dropped_keys = - ha_alter_info->index_drop_count - identical_indexes_found; - const uint n_added_keys = - ha_alter_info->index_add_count - identical_indexes_found; - DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys); - DBUG_ASSERT(added_indexes.size() == n_added_keys); - DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys)); + /* Determine which(if any) key definitions(s) need to be added */ + int identical_indexes_found = 0; + for (i = 0; i < ha_alter_info->index_add_count; i++) { + const KEY *const added_key = + &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; + for (j = 0; j < new_n_keys; j++) { + const KEY *const new_key = + &altered_table->key_info[new_key_descr[j]->get_keyno()]; + if (!compare_keys(new_key, added_key)) { + /* + Check for cases where an 'identical' index is being dropped and + re-added in a single ALTER statement. Turn this into a no-op as the + index has not changed. + + E.G. Unique index -> non-unique index requires no change + + Note that cases where the index name remains the same but the + key-parts are changed is already handled in create_inplace_key_defs. + In these cases the index needs to be rebuilt. + */ + if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) { + dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id()); + identical_indexes_found++; + } else { + added_indexes.insert(new_key_descr[j]); + } + + break; + } + } + } + + n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found; + n_added_keys = ha_alter_info->index_add_count - identical_indexes_found; + DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys); + DBUG_ASSERT(added_indexes.size() == n_added_keys); + DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys)); + } + if (ha_alter_info->handler_flags & + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) { + if (!new_tdef) { + new_tdef = m_tbl_def; + } + max_auto_incr = load_auto_incr_value_from_index(); + } ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx( new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, - added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys); - + added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys, + max_auto_incr); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -10829,9 +11344,10 @@ int ha_rocksdb::inplace_populate_sk( bool is_unique_index = new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME; - Rdb_index_merge rdb_merge( - thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size, - rdb_merge_tmp_file_removal_delay, index->get_cf()); + Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size, + rdb_merge_combine_read_size, + rdb_merge_tmp_file_removal_delay, + index->get_cf()); if ((res = rdb_merge.init())) { DBUG_RETURN(res); @@ -11135,6 +11651,29 @@ bool ha_rocksdb::commit_inplace_alter_table( rdb_drop_idx_thread.signal(); } + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) { + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + std::unordered_set create_index_ids; + + ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value; + + for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) { + Rdb_inplace_alter_ctx *const ctx = + static_cast(*pctx); + auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr); + dict_manager.put_auto_incr_val( + batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val, + true /* overwrite */); + ctx->m_new_tdef->m_auto_incr_val = auto_incr_val; + } + + if (dict_manager.commit(batch)) { + DBUG_ASSERT(0); + } + } + DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -11162,15 +11701,29 @@ struct rocksdb_status_counters_t { uint64_t block_cache_miss; uint64_t block_cache_hit; uint64_t block_cache_add; + uint64_t block_cache_add_failures; uint64_t block_cache_index_miss; uint64_t block_cache_index_hit; + uint64_t block_cache_index_add; + uint64_t block_cache_index_bytes_insert; + uint64_t block_cache_index_bytes_evict; uint64_t block_cache_filter_miss; uint64_t block_cache_filter_hit; + uint64_t block_cache_filter_add; + uint64_t block_cache_filter_bytes_insert; + uint64_t block_cache_filter_bytes_evict; + uint64_t block_cache_bytes_read; + uint64_t block_cache_bytes_write; + uint64_t block_cache_data_bytes_insert; uint64_t block_cache_data_miss; uint64_t block_cache_data_hit; + uint64_t block_cache_data_add; uint64_t bloom_filter_useful; uint64_t memtable_hit; uint64_t memtable_miss; + uint64_t get_hit_l0; + uint64_t get_hit_l1; + uint64_t get_hit_l2_and_up; uint64_t compaction_key_drop_new; uint64_t compaction_key_drop_obsolete; uint64_t compaction_key_drop_user; @@ -11179,11 +11732,17 @@ struct rocksdb_status_counters_t { uint64_t number_keys_updated; uint64_t bytes_written; uint64_t bytes_read; + uint64_t number_db_seek; + uint64_t number_db_seek_found; + uint64_t number_db_next; + uint64_t number_db_next_found; + uint64_t number_db_prev; + uint64_t number_db_prev_found; + uint64_t iter_bytes_read; uint64_t no_file_closes; uint64_t no_file_opens; uint64_t no_file_errors; uint64_t stall_micros; - uint64_t rate_limit_delay_millis; uint64_t num_iterators; uint64_t number_multiget_get; uint64_t number_multiget_keys_read; @@ -11216,15 +11775,29 @@ static rocksdb_status_counters_t rocksdb_status_counters; DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS) DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT) DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD) +DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES) DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS) DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT) +DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD) +DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT) DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS) DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT) +DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD) +DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT) +DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ) +DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE) +DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT) DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS) DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT) +DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD) DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL) DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT) DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS) +DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0) +DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1) +DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP) DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY) DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE) DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER) @@ -11233,11 +11806,17 @@ DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ) DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED) DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN) DEF_SHOW_FUNC(bytes_read, BYTES_READ) +DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK) +DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND) +DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT) +DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND) +DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV) +DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND) +DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ) DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES) DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS) DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS) DEF_SHOW_FUNC(stall_micros, STALL_MICROS) -DEF_SHOW_FUNC(rate_limit_delay_millis, RATE_LIMIT_DELAY_MILLIS) DEF_SHOW_FUNC(num_iterators, NO_ITERATORS) DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS) DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ) @@ -11271,6 +11850,7 @@ static void myrocks_update_status() { export_stats.rows_updated = global_stats.rows[ROWS_UPDATED]; export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND]; export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED]; + export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED]; export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED]; export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED]; @@ -11309,6 +11889,8 @@ static SHOW_VAR myrocks_status_variables[] = { SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered, + SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("system_rows_deleted", &export_stats.system_rows_deleted, SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("system_rows_inserted", @@ -11427,15 +12009,29 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(block_cache_miss), DEF_STATUS_VAR(block_cache_hit), DEF_STATUS_VAR(block_cache_add), + DEF_STATUS_VAR(block_cache_add_failures), DEF_STATUS_VAR(block_cache_index_miss), DEF_STATUS_VAR(block_cache_index_hit), + DEF_STATUS_VAR(block_cache_index_add), + DEF_STATUS_VAR(block_cache_index_bytes_insert), + DEF_STATUS_VAR(block_cache_index_bytes_evict), DEF_STATUS_VAR(block_cache_filter_miss), DEF_STATUS_VAR(block_cache_filter_hit), + DEF_STATUS_VAR(block_cache_filter_add), + DEF_STATUS_VAR(block_cache_filter_bytes_insert), + DEF_STATUS_VAR(block_cache_filter_bytes_evict), + DEF_STATUS_VAR(block_cache_bytes_read), + DEF_STATUS_VAR(block_cache_bytes_write), + DEF_STATUS_VAR(block_cache_data_bytes_insert), DEF_STATUS_VAR(block_cache_data_miss), DEF_STATUS_VAR(block_cache_data_hit), + DEF_STATUS_VAR(block_cache_data_add), DEF_STATUS_VAR(bloom_filter_useful), DEF_STATUS_VAR(memtable_hit), DEF_STATUS_VAR(memtable_miss), + DEF_STATUS_VAR(get_hit_l0), + DEF_STATUS_VAR(get_hit_l1), + DEF_STATUS_VAR(get_hit_l2_and_up), DEF_STATUS_VAR(compaction_key_drop_new), DEF_STATUS_VAR(compaction_key_drop_obsolete), DEF_STATUS_VAR(compaction_key_drop_user), @@ -11444,11 +12040,17 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(number_keys_updated), DEF_STATUS_VAR(bytes_written), DEF_STATUS_VAR(bytes_read), + DEF_STATUS_VAR(number_db_seek), + DEF_STATUS_VAR(number_db_seek_found), + DEF_STATUS_VAR(number_db_next), + DEF_STATUS_VAR(number_db_next_found), + DEF_STATUS_VAR(number_db_prev), + DEF_STATUS_VAR(number_db_prev_found), + DEF_STATUS_VAR(iter_bytes_read), DEF_STATUS_VAR(no_file_closes), DEF_STATUS_VAR(no_file_opens), DEF_STATUS_VAR(no_file_errors), DEF_STATUS_VAR(stall_micros), - DEF_STATUS_VAR(rate_limit_delay_millis), DEF_STATUS_VAR(num_iterators), DEF_STATUS_VAR(number_multiget_get), DEF_STATUS_VAR(number_multiget_keys_read), @@ -11474,12 +12076,14 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(number_superversion_releases), DEF_STATUS_VAR(number_superversion_cleanups), DEF_STATUS_VAR(number_block_not_compressed), + DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("row_lock_wait_timeouts", + &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("snapshot_conflict_errors", &rocksdb_snapshot_conflict_errors, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs, SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes, - SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete, @@ -11546,8 +12150,8 @@ void Rdb_background_thread::run() { // InnoDB's behavior. For mode never, the wal file isn't even written, // whereas background writes to the wal file, but issues the syncs in a // background thread. - if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) { - DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes); + if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) && + !rocksdb_db_options->allow_mmap_writes) { const rocksdb::Status s = rdb->FlushWAL(true); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); @@ -11563,6 +12167,16 @@ void Rdb_background_thread::run() { ddl_manager.persist_stats(); } +bool ha_rocksdb::check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys) { + bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys); + if (!can_use_bloom) { + setup_iterator_bounds(kd, eq_cond); + } + return can_use_bloom; +} + /** Deciding if it is possible to use bloom filter or not. @@ -11581,9 +12195,9 @@ void Rdb_background_thread::run() { @param use_all_keys True if all key parts are set with equal conditions. This is aware of extended keys. */ -bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, - const rocksdb::Slice &eq_cond, - const bool use_all_keys) { +bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys) { bool can_use = false; if (THDVAR(thd, skip_bloom_filter_on_read)) { @@ -11719,7 +12333,7 @@ void rdb_handle_io_error(const rocksdb::Status status, rdb_log_status_error(status, "failed to write to WAL"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on WAL write error."); - abort_with_stack_traces(); + abort(); break; } case RDB_IO_ERROR_BG_THREAD: { @@ -11730,7 +12344,7 @@ void rdb_handle_io_error(const rocksdb::Status status, rdb_log_status_error(status, "failed on I/O"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on I/O error."); - abort_with_stack_traces(); + abort(); break; } default: @@ -11739,16 +12353,17 @@ void rdb_handle_io_error(const rocksdb::Status status, } } else if (status.IsCorruption()) { rdb_log_status_error(status, "data corruption detected!"); + rdb_persist_corruption_marker(); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting because of data corruption."); - abort_with_stack_traces(); + abort(); } else if (!status.ok()) { switch (err_type) { case RDB_IO_ERROR_DICT_COMMIT: { rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on WAL write error."); - abort_with_stack_traces(); + abort(); break; } default: @@ -11901,11 +12516,43 @@ void rocksdb_set_collation_exception_list(THD *const thd, *static_cast(var_ptr) = val; } -void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var - MY_ATTRIBUTE((__unused__)), - void *const var_ptr, const void *const save) { - Rdb_transaction *&tx = get_tx_from_thd(thd); +int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) { + int new_value_type = value->value_type(value); + if (new_value_type == MYSQL_VALUE_TYPE_STRING) { + char buf[16]; + int len = sizeof(buf); + const char *str = value->val_str(value, buf, &len); + if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 || + my_strcasecmp(system_charset_info, "on", str) == 0)) { + *return_value = TRUE; + } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 || + my_strcasecmp(system_charset_info, "off", str) == 0)) { + *return_value = FALSE; + } else { + return 1; + } + } else if (new_value_type == MYSQL_VALUE_TYPE_INT) { + long long intbuf; + value->val_int(value, &intbuf); + if (intbuf > 1) + return 1; + *return_value = intbuf > 0 ? TRUE : FALSE; + } else { + return 1; + } + return 0; +} + +int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + + Rdb_transaction *&tx = get_tx_from_thd(thd); if (tx != nullptr) { const int rc = tx->finish_bulk_load(); if (rc != 0) { @@ -11913,23 +12560,32 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var sql_print_error("RocksDB: Error %d finalizing last SST file while " "setting bulk loading variable", rc); - abort_with_stack_traces(); + THDVAR(thd, bulk_load) = 0; + return 1; } } - *static_cast(var_ptr) = *static_cast(save); + *static_cast(save) = new_value; + return 0; } -void rocksdb_set_bulk_load_allow_unsorted( - THD *const thd, - struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), - void *const var_ptr, const void *const save) { +int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + if (THDVAR(thd, bulk_load)) { my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET", "Cannot change this setting while bulk load is enabled"); - } else { - *static_cast(var_ptr) = *static_cast(save); + + return 1; } + + *static_cast(save) = new_value; + return 0; } static void rocksdb_set_max_background_jobs(THD *thd, @@ -11960,34 +12616,115 @@ static void rocksdb_set_max_background_jobs(THD *thd, RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } -void rocksdb_set_update_cf_options(THD *const /* unused */, - struct st_mysql_sys_var *const /* unused */, - void *const var_ptr, - const void *const save) { - const char *const val = *static_cast(save); - - if (!val) { - // NO_LINT_DEBUG - sql_print_warning("MyRocks: NULL is not a valid option for updates to " - "column family settings."); - return; - } +static void rocksdb_set_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); - DBUG_ASSERT(val != nullptr); + const ulonglong new_val = *static_cast(save); + + if (rocksdb_db_options->bytes_per_sync != new_val) { + rocksdb_db_options->bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_wal_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const ulonglong new_val = *static_cast(save); + + if (rocksdb_db_options->wal_bytes_per_sync != new_val) { + rocksdb_db_options->wal_bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static int +rocksdb_validate_update_cf_options(THD * /* unused */, + struct st_mysql_sys_var * /*unused*/, + void *save, struct st_mysql_value *value) { + + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length; + length = sizeof(buff); + str = value->val_str(value, buff, &length); + *(const char **)save = str; + + if (str == nullptr) { + return HA_EXIT_SUCCESS; + } - // Do the real work of applying the changes. Rdb_cf_options::Name_to_config_t option_map; // Basic sanity checking and parsing the options into a map. If this fails // then there's no point to proceed. - if (!Rdb_cf_options::parse_cf_options(val, &option_map)) { - *reinterpret_cast(var_ptr) = nullptr; + if (!Rdb_cf_options::parse_cf_options(str, &option_map)) { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str); + return HA_EXIT_FAILURE; + } + return HA_EXIT_SUCCESS; +} - // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to parse the updated column family " - "options = '%s'.", val); +static void +rocksdb_set_update_cf_options(THD *const /* unused */, + struct st_mysql_sys_var *const /* unused */, + void *const var_ptr, const void *const save) { + const char *const val = *static_cast(save); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + if (!val) { + *reinterpret_cast(var_ptr) = nullptr; + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); + return; + } + + DBUG_ASSERT(val != nullptr); + + // Reset the pointers regardless of how much success we had with updating + // the CF options. This will results in consistent behavior and avoids + // dealing with cases when only a subset of CF-s was successfully updated. + *reinterpret_cast(var_ptr) = my_strdup(val, MYF(0)); + + // Do the real work of applying the changes. + Rdb_cf_options::Name_to_config_t option_map; + + // This should never fail, because of rocksdb_validate_update_cf_options + if (!Rdb_cf_options::parse_cf_options(val, &option_map)) { RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); return; } @@ -12047,15 +12784,6 @@ void rocksdb_set_update_cf_options(THD *const /* unused */, } } - // Reset the pointers regardless of how much success we had with updating - // the CF options. This will results in consistent behavior and avoids - // dealing with cases when only a subset of CF-s was successfully updated. - if (val) { - *reinterpret_cast(var_ptr) = my_strdup(val, MYF(0)); - } else { - *reinterpret_cast(var_ptr) = nullptr; - } - // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to // free up resources used before. @@ -12121,6 +12849,12 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { DBUG_RETURN((rows / 20.0) + 1); } +std::string rdb_corruption_marker_file_name() { + std::string ret(rocksdb_datadir); + ret.append("/ROCKSDB_CORRUPTED"); + return ret; +} + } // namespace myrocks /* @@ -12152,4 +12886,4 @@ mysql_declare_plugin(rocksdb_se){ myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats, myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl, myrocks::rdb_i_s_index_file_map, myrocks::rdb_i_s_lock_info, - myrocks::rdb_i_s_trx_info mysql_declare_plugin_end; + myrocks::rdb_i_s_trx_info, myrocks::rdb_i_s_deadlock_info mysql_declare_plugin_end; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 3b72173f94c..0b3b473d841 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -34,6 +34,7 @@ /* RocksDB header files */ #include "rocksdb/cache.h" +#include "rocksdb/merge_operator.h" #include "rocksdb/perf_context.h" #include "rocksdb/sst_file_manager.h" #include "rocksdb/statistics.h" @@ -89,6 +90,25 @@ struct Rdb_trx_info { std::vector rdb_get_all_trx_info(); +/* + * class for exporting deadlock transaction information for + * information_schema.rocksdb_deadlock + */ +struct Rdb_deadlock_info { + struct Rdb_dl_trx_info { + ulonglong trx_id; + std::string cf_name; + std::string waiting_key; + bool exclusive_lock; + std::string index_name; + std::string table_name; + }; + std::vector path; + ulonglong victim_trx_id; +}; + +std::vector rdb_get_deadlock_info(); + /* This is - the name of the default Column Family (the CF which stores indexes which @@ -236,6 +256,8 @@ const char *const RDB_TTL_COL_QUALIFIER = "ttl_col"; */ #define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) +#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong) + /* Maximum index prefix length in bytes. */ @@ -353,6 +375,7 @@ enum operation_type : int { ROWS_UPDATED, ROWS_DELETED_BLIND, ROWS_EXPIRED, + ROWS_FILTERED, ROWS_HIDDEN_NO_SNAPSHOT, ROWS_MAX }; @@ -386,6 +409,7 @@ struct st_export_stats { ulonglong rows_updated; ulonglong rows_deleted_blind; ulonglong rows_expired; + ulonglong rows_filtered; ulonglong rows_hidden_no_snapshot; ulonglong system_rows_deleted; @@ -524,6 +548,12 @@ class ha_rocksdb : public my_core::handler { uchar *m_dup_sk_packed_tuple; uchar *m_dup_sk_packed_tuple_old; + /* Buffers used for passing upper/bound eq conditions. */ + uchar *m_eq_cond_lower_bound; + uchar *m_eq_cond_upper_bound; + rocksdb::Slice m_eq_cond_lower_bound_slice; + rocksdb::Slice m_eq_cond_upper_bound_slice; + /* Temporary space for packing VARCHARs (we provide it to pack_record()/pack_index_tuple() calls). @@ -553,7 +583,7 @@ class ha_rocksdb : public my_core::handler { This is used only when we get the record with rocksdb's Get() call (The other option is when we get a rocksdb::Slice from an iterator) */ - std::string m_retrieved_record; + rocksdb::PinnableSlice m_retrieved_record; /* Type of locking to apply to rows */ enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows; @@ -586,12 +616,7 @@ class ha_rocksdb : public my_core::handler { bool m_update_scope_is_valid; /* SST information used for bulk loading the primary key */ - std::unique_ptr m_sst_info; - /* External merge sorts for bulk load: key ID -> merge sort instance */ - std::unordered_map m_key_merge; - Rdb_transaction *m_bulk_load_tx; - /* Mutex to protect finalizing bulk load */ - mysql_mutex_t m_bulk_load_mutex; + std::shared_ptr m_sst_info; /* MySQL index number for duplicate key error @@ -606,13 +631,17 @@ class ha_rocksdb : public my_core::handler { int secondary_index_read(const int keyno, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void setup_iterator_for_rnd_scan(); - void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice) - MY_ATTRIBUTE((__nonnull__)) { - setup_scan_iterator(kd, slice, false, 0); - } bool is_ascending(const Rdb_key_def &keydef, enum ha_rkey_function find_flag) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void setup_iterator_bounds(const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond); + bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys); + bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys); void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice, const bool use_all_keys, const uint eq_cond_len) MY_ATTRIBUTE((__nonnull__)); @@ -621,7 +650,8 @@ class ha_rocksdb : public my_core::handler { rocksdb::Status get_for_update(Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value) const; + const rocksdb::Slice &key, + rocksdb::PinnableSlice *value) const; int get_row_by_rowid(uchar *const buf, const char *const rowid, const uint rowid_size, const bool skip_lookup = false, @@ -635,8 +665,13 @@ class ha_rocksdb : public my_core::handler { rowid_size, skip_lookup, skip_ttl_check); } - void update_auto_incr_val(); void load_auto_incr_value(); + ulonglong load_auto_incr_value_from_index(); + void update_auto_incr_val(ulonglong val); + void update_auto_incr_val_from_field(); + rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx, + const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const; longlong update_hidden_pk_val(); int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) @@ -680,6 +715,12 @@ class ha_rocksdb : public my_core::handler { */ std::vector m_decoders_vect; + /* + This tells if any field which is part of the key needs to be unpacked and + decoded. + */ + bool m_key_requested = false; + /* Setup field_decoders based on type of scan and table->read_set */ void setup_read_decoders(); @@ -739,9 +780,12 @@ public: my_core::TABLE_SHARE *const table_arg); ~ha_rocksdb() { int err MY_ATTRIBUTE((__unused__)); - err = finalize_bulk_load(); - DBUG_ASSERT(err == 0); - mysql_mutex_destroy(&m_bulk_load_mutex); + err = finalize_bulk_load(false); + if (err != 0) { + sql_print_error("RocksDB: Error %d finalizing bulk load while closing " + "handler.", + err); + } } /** @brief @@ -766,10 +810,9 @@ public: const char **bas_ext() const override; /* - See if this is the same base table - this should only be true for different - partitions of the same table. + Returns the name of the table's base name */ - bool same_table(const ha_rocksdb &other) const; + const std::string &get_table_basename() const; /** @brief This is a list of flags that indicate what functionality the storage engine @@ -1179,8 +1222,6 @@ private: Rdb_tbl_def *get_table_if_exists(const char *const tablename) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__)); - const char *thd_rocksdb_tmpdir() - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); bool contains_foreign_key(THD *const thd) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -1190,6 +1231,9 @@ private: const std::unordered_set> &indexes) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int finalize_bulk_load(bool print_client_error = true) + MY_ATTRIBUTE((__warn_unused_result__)); + public: int index_init(uint idx, bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1233,7 +1277,7 @@ public: DBUG_ENTER_FUNC(); /* Free blob data */ - m_retrieved_record.clear(); + m_retrieved_record.Reset(); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -1304,8 +1348,6 @@ public: my_core::Alter_inplace_info *const ha_alter_info, bool commit) override; - int finalize_bulk_load() MY_ATTRIBUTE((__warn_unused_result__)); - void set_use_read_free_rpl(const char *const whitelist); void set_skip_unique_check_tables(const char *const whitelist); @@ -1356,18 +1398,22 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { /* Stores number of keys to drop */ const uint m_n_dropped_keys; + /* Stores the largest current auto increment value in the index */ + const ulonglong m_max_auto_incr; + Rdb_inplace_alter_ctx( Rdb_tbl_def *new_tdef, std::shared_ptr *old_key_descr, std::shared_ptr *new_key_descr, uint old_n_keys, uint new_n_keys, std::unordered_set> added_indexes, std::unordered_set dropped_index_ids, uint n_added_keys, - uint n_dropped_keys) + uint n_dropped_keys, ulonglong max_auto_incr) : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), - m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) {} + m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys), + m_max_auto_incr(max_auto_incr) {} ~Rdb_inplace_alter_ctx() {} @@ -1376,4 +1422,8 @@ private: Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &); Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); }; + +// file name indicating RocksDB data corruption +std::string rdb_corruption_marker_file_name(); + } // namespace myrocks diff --git a/storage/rocksdb/myrocks_hotbackup b/storage/rocksdb/myrocks_hotbackup new file mode 100755 index 00000000000..cb10bb902c0 --- /dev/null +++ b/storage/rocksdb/myrocks_hotbackup @@ -0,0 +1,686 @@ +#!/usr/bin/env python + +from __future__ import division +from optparse import OptionParser +import collections +import signal +import os +import stat +import sys +import re +import commands +import subprocess +import logging +import logging.handlers +import time +import datetime +import shutil +import traceback +import tempfile + +import MySQLdb +import MySQLdb.connections +from MySQLdb import OperationalError, ProgrammingError + +logger = None +opts = None +rocksdb_files = ['MANIFEST', 'CURRENT', 'OPTIONS'] +rocksdb_data_suffix = '.sst' +rocksdb_wal_suffix = '.log' +exclude_files = ['master.info', 'relay-log.info', 'worker-relay-log.info', + 'auto.cnf', 'gaplock.log', 'ibdata', 'ib_logfile', '.trash'] +wdt_bin = 'wdt' + +def is_manifest(fname): + for m in rocksdb_files: + if fname.startswith(m): + return True + return False + +class Writer(object): + a = None + def __init__(self): + a = None + +class StreamWriter(Writer): + stream_cmd= '' + + def __init__(self, stream_option): + super(StreamWriter, self).__init__() + if stream_option == 'tar': + self.stream_cmd= 'tar chf -' + elif stream_option == 'xbstream': + self.stream_cmd= 'xbstream -c' + else: + raise Exception("Only tar or xbstream is supported as streaming option.") + + def write(self, file_name): + rc= os.system(self.stream_cmd + " " + file_name) + if (rc != 0): + raise Exception("Got error on stream write: " + str(rc) + " " + file_name) + + +class MiscFilesProcessor(): + datadir = None + wildcard = r'.*\.[frm|MYD|MYI|MAD|MAI|MRG|TRG|TRN|ARM|ARZ|CSM|CSV|opt|par]' + regex = None + start_backup_time = None + skip_check_frm_timestamp = None + + def __init__(self, datadir, skip_check_frm_timestamp, start_backup_time): + self.datadir = datadir + self.regex = re.compile(self.wildcard) + self.skip_check_frm_timestamp = skip_check_frm_timestamp + self.start_backup_time = start_backup_time + + def process_db(self, db): + # do nothing + pass + + def process_file(self, path): + # do nothing + pass + + def check_frm_timestamp(self, fname, path): + if not self.skip_check_frm_timestamp and fname.endswith('.frm'): + if os.path.getmtime(path) > self.start_backup_time: + logger.error('FRM file %s was updated after starting backups. ' + 'Schema could have changed and the resulting copy may ' + 'not be valid. Aborting. ' + '(backup time: %s, file modifled time: %s)', + path, datetime.datetime.fromtimestamp(self.start_backup_time).strftime('%Y-%m-%d %H:%M:%S'), + datetime.datetime.fromtimestamp(os.path.getmtime(path)).strftime('%Y-%m-%d %H:%M:%S')) + raise Exception("Inconsistent frm file timestamp"); + + def process(self): + os.chdir(self.datadir) + for db in self.get_databases(): + logger.info("Starting MySQL misc file traversal from database %s..", db) + self.process_db(db) + for f in self.get_files(db): + if self.match(f): + rel_path = os.path.join(db, f) + self.check_frm_timestamp(f, rel_path) + self.process_file(rel_path) + logger.info("Traversing misc files from data directory..") + for f in self.get_files(""): + should_skip = False + for e in exclude_files: + if f.startswith(e) or f.endswith(e): + logger.info("Skipping %s", f) + should_skip = True + break + if not should_skip: + self.process_file(f) + + def match(self, filename): + if self.regex.match(filename): + return True + else: + return False + + def get_databases(self): + dbs = [] + dirs = [ d for d in os.listdir(self.datadir) \ + if not os.path.isfile(os.path.join(self.datadir,d))] + for db in dirs: + if not db.startswith('.') and not self._is_socket(db): + dbs.append(db) + return dbs + + def get_files(self, db): + dbdir = self.datadir + "/" + db + return [ f for f in os.listdir(dbdir) \ + if os.path.isfile(os.path.join(dbdir,f))] + + def _is_socket(self, item): + mode = os.stat(os.path.join(self.datadir, item)).st_mode + if stat.S_ISSOCK(mode): + return True + return False + + +class MySQLBackup(MiscFilesProcessor): + writer = None + + def __init__(self, datadir, writer, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.writer = writer + + def process_file(self, fname): # overriding base class + self.writer.write(fname) + + +class MiscFilesLinkCreator(MiscFilesProcessor): + snapshot_dir = None + + def __init__(self, datadir, snapshot_dir, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.snapshot_dir = snapshot_dir + + def process_db(self, db): + snapshot_sub_dir = os.path.join(self.snapshot_dir, db) + os.makedirs(snapshot_sub_dir) + + def process_file(self, path): + dst_path = os.path.join(self.snapshot_dir, path) + os.link(path, dst_path) + + +# RocksDB backup +class RocksDBBackup(): + source_dir = None + writer = None + # sst files sent in this backup round + sent_sst = {} + # target sst files in this backup round + target_sst = {} + # sst files sent in all backup rounds + total_sent_sst= {} + # sum of sst file size sent in this backup round + sent_sst_size = 0 + # sum of target sst file size in this backup round + # if sent_sst_size becomes equal to target_sst_size, + # it means the backup round finished backing up all sst files + target_sst_size = 0 + # sum of all sst file size sent all backup rounds + total_sent_sst_size= 0 + # sum of all target sst file size from all backup rounds + total_target_sst_size = 0 + show_progress_size_interval= 1073741824 # 1GB + wal_files= [] + manifest_files= [] + finished= False + + def __init__(self, source_dir, writer, prev): + self.source_dir = source_dir + self.writer = writer + os.chdir(self.source_dir) + self.init_target_files(prev) + + def init_target_files(self, prev): + sst = {} + self.sent_sst = {} + self.target_sst= {} + self.total_sent_sst = {} + self.sent_sst_size = 0 + self.target_sst_size = 0 + self.total_sent_sst_size= 0 + self.total_target_sst_size= 0 + self.wal_files= [] + self.manifest_files= [] + + for f in os.listdir(self.source_dir): + if f.endswith(rocksdb_data_suffix): + # exactly the same file (same size) was sent in previous backup rounds + if prev is not None and f in prev.total_sent_sst and int(os.stat(f).st_size) == prev.total_sent_sst[f]: + continue + sst[f]= int(os.stat(f).st_size) + self.target_sst_size = self.target_sst_size + os.stat(f).st_size + elif is_manifest(f): + self.manifest_files.append(f) + elif f.endswith(rocksdb_wal_suffix): + self.wal_files.append(f) + self.target_sst= collections.OrderedDict(sorted(sst.items())) + + if prev is not None: + self.total_sent_sst = prev.total_sent_sst + self.total_sent_sst_size = prev.total_sent_sst_size + self.total_target_sst_size = self.target_sst_size + prev.total_sent_sst_size + else: + self.total_target_sst_size = self.target_sst_size + + def do_backup_single(self, fname): + self.writer.write(fname) + os.remove(fname) + + def do_backup_sst(self, fname, size): + self.do_backup_single(fname) + self.sent_sst[fname]= size + self.total_sent_sst[fname]= size + self.sent_sst_size = self.sent_sst_size + size + self.total_sent_sst_size = self.total_sent_sst_size + size + + def do_backup_manifest(self): + for f in self.manifest_files: + self.do_backup_single(f) + + def do_backup_wal(self): + for f in self.wal_files: + self.do_backup_single(f) + + # this is the last snapshot round. backing up all the rest files + def do_backup_final(self): + logger.info("Backup WAL..") + self.do_backup_wal() + logger.info("Backup Manifest..") + self.do_backup_manifest() + self.do_cleanup() + self.finished= True + + def do_cleanup(self): + shutil.rmtree(self.source_dir) + logger.info("Cleaned up checkpoint from %s", self.source_dir) + + def do_backup_until(self, time_limit): + logger.info("Starting backup from snapshot: target files %d", len(self.target_sst)) + start_time= time.time() + last_progress_time= start_time + progress_size= 0 + for fname, size in self.target_sst.iteritems(): + self.do_backup_sst(fname, size) + progress_size= progress_size + size + elapsed_seconds = time.time() - start_time + progress_seconds = time.time() - last_progress_time + + if self.should_show_progress(size): + self.show_progress(progress_size, progress_seconds) + progress_size=0 + last_progress_time= time.time() + + if elapsed_seconds > time_limit and self.has_sent_all_sst() is False: + logger.info("Snapshot round finished. Elapsed Time: %5.2f. Remaining sst files: %d", + elapsed_seconds, len(self.target_sst) - len(self.sent_sst)) + self.do_cleanup() + break; + if self.has_sent_all_sst(): + self.do_backup_final() + + return self + + def should_show_progress(self, size): + if int(self.total_sent_sst_size/self.show_progress_size_interval) > int((self.total_sent_sst_size-size)/self.show_progress_size_interval): + return True + else: + return False + + def show_progress(self, size, seconds): + logger.info("Backup Progress: %5.2f%% Sent %6.2f GB of %6.2f GB data, Transfer Speed: %6.2f MB/s", + self.total_sent_sst_size*100/self.total_target_sst_size, + self.total_sent_sst_size/1024/1024/1024, + self.total_target_sst_size/1024/1024/1024, + size/seconds/1024/1024) + + def print_backup_report(self): + logger.info("Sent %6.2f GB of sst files, %d files in total.", + self.total_sent_sst_size/1024/1024/1024, + len(self.total_sent_sst)) + + def has_sent_all_sst(self): + if self.sent_sst_size == self.target_sst_size: + return True + return False + + +class MySQLUtil: + @staticmethod + def connect(user, password, port, socket=None): + if socket: + dbh = MySQLdb.Connect(user=user, + passwd=password, + unix_socket=socket) + else: + dbh = MySQLdb.Connect(user=user, + passwd=password, + port=port, + host="127.0.0.1") + return dbh + + @staticmethod + def create_checkpoint(dbh, checkpoint_dir): + sql = ("SET GLOBAL rocksdb_create_checkpoint='{0}'" + .format(checkpoint_dir)) + cur= dbh.cursor() + cur.execute(sql) + cur.close() + + @staticmethod + def get_datadir(dbh): + sql = "SELECT @@datadir" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] + + +class BackupRunner: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def start_backup_round(self, backup_round, prev_backup): + def signal_handler(*args): + logger.info("Got signal. Exit") + if b is not None: + logger.info("Cleaning up snapshot directory..") + b.do_cleanup() + sys.exit(1) + + b = None + try: + signal.signal(signal.SIGINT, signal_handler) + w = None + if opts.output_stream: + w = StreamWriter(opts.output_stream) + else: + raise Exception("Currently only streaming backup is supported.") + + snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + b = RocksDBBackup(snapshot_dir, w, prev_backup) + return b.do_backup_until(opts.checkpoint_interval) + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + if b is not None: + logger.info("Cleaning up snapshot directory.") + b.do_cleanup() + sys.exit(1) + + def backup_mysql(self): + try: + w = None + if opts.output_stream: + w = StreamWriter(opts.output_stream) + else: + raise Exception("Currently only streaming backup is supported.") + b = MySQLBackup(self.datadir, w, opts.skip_check_frm_timestamp, + self.start_backup_time) + logger.info("Taking MySQL misc backups..") + b.process() + logger.info("MySQL misc backups done.") + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + sys.exit(1) + + +class WDTBackup: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def cleanup(self, snapshot_dir, server_log): + if server_log: + server_log.seek(0) + logger.info("WDT server log:") + logger.info(server_log.read()) + server_log.close() + if snapshot_dir: + logger.info("Cleaning up snapshot dir %s", snapshot_dir) + shutil.rmtree(snapshot_dir) + + def backup_with_timeout(self, backup_round): + def signal_handler(*args): + logger.info("Got signal. Exit") + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + logger.info("Starting backup round %d", backup_round) + snapshot_dir = None + server_log = None + try: + signal.signal(signal.SIGINT, signal_handler) + # create rocksdb snapshot + snapshot_dir = os.path.join(opts.checkpoint_directory, str(backup_round)) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + + # get datadir if not provided + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + + # create links for misc files + link_creator = MiscFilesLinkCreator(self.datadir, snapshot_dir, + opts.skip_check_frm_timestamp, + self.start_backup_time) + link_creator.process() + + current_path = os.path.join(opts.backupdir, "CURRENT") + + # construct receiver cmd, using the data directory as recovery-id. + # we delete the current file because it is not append-only, therefore not + # resumable. + remote_cmd = ( + "ssh {0} rm -f {1}; " + "{2} -directory {3} -enable_download_resumption " + "-recovery_id {4} -start_port 0 -abort_after_seconds {5} {6}" + ).format(opts.destination, + current_path, + wdt_bin, + opts.backupdir, + self.datadir, + opts.checkpoint_interval, + opts.extra_wdt_receiver_options) + logger.info("WDT remote cmd %s", remote_cmd) + server_log = tempfile.TemporaryFile() + remote_process = subprocess.Popen(remote_cmd.split(), + stdout=subprocess.PIPE, + stderr=server_log) + wdt_url = remote_process.stdout.readline().strip() + if not wdt_url: + raise Exception("Unable to get connection url from wdt receiver") + sender_cmd = ( + "{0} -connection_url \'{1}\' -directory {2} -app_name=myrocks " + "-avg_mbytes_per_sec {3} " + "-enable_download_resumption -abort_after_seconds {4} {5}" + ).format(wdt_bin, + wdt_url, + snapshot_dir, + opts.avg_mbytes_per_sec, + opts.checkpoint_interval, + opts.extra_wdt_sender_options) + sender_status = os.system(sender_cmd) >> 8 + remote_status = remote_process.wait() + self.cleanup(snapshot_dir, server_log) + # TODO: handle retryable and non-retyable errors differently + return (sender_status == 0 and remote_status == 0) + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + +def backup_using_wdt(): + if not opts.destination: + logger.error("Must provide remote destination when using WDT") + sys.exit(1) + + # TODO: detect whether WDT is installed + logger.info("Backing up myrocks to %s using WDT", opts.destination) + wdt_backup = WDTBackup(opts.datadir) + finished = False + backup_round = 1 + while not finished: + start_time = time.time() + finished = wdt_backup.backup_with_timeout(backup_round) + end_time = time.time() + duration_seconds = end_time - start_time + if (not finished) and (duration_seconds < opts.checkpoint_interval): + # round finished before timeout + sleep_duration = (opts.checkpoint_interval - duration_seconds) + logger.info("Sleeping for %f seconds", sleep_duration) + time.sleep(sleep_duration) + + backup_round = backup_round + 1 + logger.info("Finished myrocks backup using WDT") + + +def init_logger(): + global logger + logger = logging.getLogger('myrocks_hotbackup') + logger.setLevel(logging.INFO) + h1= logging.StreamHandler(sys.stderr) + f = logging.Formatter("%(asctime)s.%(msecs)03d %(levelname)s %(message)s", + "%Y-%m-%d %H:%M:%S") + h1.setFormatter(f) + logger.addHandler(h1) + +backup_wdt_usage = ("Backup using WDT: myrocks_hotbackup " + "--user=root --password=pw --stream=wdt " + "--checkpoint_dir= --destination= --backup_dir=" + ". This has to be executed at the src " + "host.") +backup_usage= "Backup: set -o pipefail; myrocks_hotbackup --user=root --password=pw --port=3306 --checkpoint_dir= | ssh -o NoneEnabled=yes remote_server 'tar -xi -C ' . You need to execute backup command on a server where you take backups." +move_back_usage= "Move-Back: myrocks_hotbackup --move_back --datadir= --rocksdb_datadir= --rocksdb_waldir= --backup_dir= . You need to execute move-back command on a server where backup files are sent." + + +def parse_options(): + global opts + parser = OptionParser(usage = "\n\n" + backup_usage + "\n\n" + \ + backup_wdt_usage + "\n\n" + move_back_usage) + parser.add_option('-i', '--interval', type='int', dest='checkpoint_interval', + default=300, + help='Number of seconds to renew checkpoint') + parser.add_option('-c', '--checkpoint_dir', type='string', dest='checkpoint_directory', + default='/data/mysql/backup/snapshot', + help='Local directory name where checkpoints will be created.') + parser.add_option('-d', '--datadir', type='string', dest='datadir', + default=None, + help='backup mode: src MySQL datadir. move_back mode: dest MySQL datadir') + parser.add_option('-s', '--stream', type='string', dest='output_stream', + default='tar', + help='Setting streaming backup options. Currently tar, WDT ' + 'and xbstream are supported. Default is tar') + parser.add_option('--destination', type='string', dest='destination', + default='', + help='Remote server name. Only used for WDT mode so far.') + parser.add_option('--avg_mbytes_per_sec', type='int', + dest='avg_mbytes_per_sec', + default=500, + help='Average backup rate in MBytes/sec. WDT only.') + parser.add_option('--extra_wdt_sender_options', type='string', + dest='extra_wdt_sender_options', + default='', + help='Extra options for WDT sender') + parser.add_option('--extra_wdt_receiver_options', type='string', + dest='extra_wdt_receiver_options', + default='', + help='Extra options for WDT receiver') + parser.add_option('-u', '--user', type='string', dest='mysql_user', + default='root', + help='MySQL user name') + parser.add_option('-p', '--password', type='string', dest='mysql_password', + default='', + help='MySQL password name') + parser.add_option('-P', '--port', type='int', dest='mysql_port', + default=3306, + help='MySQL port number') + parser.add_option('-S', '--socket', type='string', dest='mysql_socket', + default=None, + help='MySQL socket path. Takes precedence over --port.') + parser.add_option('-m', '--move_back', action='store_true', dest='move_back', + default=False, + help='Moving MyRocks backup files to proper locations.') + parser.add_option('-r', '--rocksdb_datadir', type='string', dest='rocksdb_datadir', + default=None, + help='RocksDB target data directory where backup data files will be moved. Must be empty.') + parser.add_option('-w', '--rocksdb_waldir', type='string', dest='rocksdb_waldir', + default=None, + help='RocksDB target data directory where backup wal files will be moved. Must be empty.') + parser.add_option('-b', '--backup_dir', type='string', dest='backupdir', + default=None, + help='backup mode for WDT: Remote directory to store ' + 'backup. move_back mode: Locations where backup ' + 'files are stored.') + parser.add_option('-f', '--skip_check_frm_timestamp', + dest='skip_check_frm_timestamp', + action='store_true', default=False, + help='skipping to check if frm files are updated after starting backup.') + parser.add_option('-D', '--debug_signal_file', type='string', dest='debug_signal_file', + default=None, + help='debugging purpose: waiting until the specified file is created') + + opts, args = parser.parse_args() + + +def create_moveback_dir(directory): + if not os.path.exists(directory): + os.makedirs(directory) + else: + for f in os.listdir(directory): + logger.error("Directory %s has file or directory %s!", directory, f) + raise + +def print_move_back_usage(): + logger.warning(move_back_usage) + +def move_back(): + if opts.rocksdb_datadir is None or opts.rocksdb_waldir is None or opts.backupdir is None or opts.datadir is None: + print_move_back_usage() + sys.exit() + create_moveback_dir(opts.datadir) + create_moveback_dir(opts.rocksdb_datadir) + create_moveback_dir(opts.rocksdb_waldir) + + os.chdir(opts.backupdir) + for f in os.listdir(opts.backupdir): + if os.path.isfile(os.path.join(opts.backupdir,f)): + if f.endswith(rocksdb_wal_suffix): + shutil.move(f, opts.rocksdb_waldir) + elif f.endswith(rocksdb_data_suffix) or is_manifest(f): + shutil.move(f, opts.rocksdb_datadir) + else: + shutil.move(f, opts.datadir) + else: #directory + if f.endswith('.rocksdb'): + continue + shutil.move(f, opts.datadir) + +def start_backup(): + logger.info("Starting backup.") + runner = BackupRunner(opts.datadir) + b = None + backup_round= 1 + while True: + b = runner.start_backup_round(backup_round, b) + backup_round = backup_round + 1 + if b.finished is True: + b.print_backup_report() + logger.info("RocksDB Backup Done.") + break + if opts.debug_signal_file: + while not os.path.exists(opts.debug_signal_file): + logger.info("Waiting until %s is created..", opts.debug_signal_file) + time.sleep(1) + runner.backup_mysql() + logger.info("All Backups Done.") + + +def main(): + parse_options() + init_logger() + + if opts.move_back is True: + move_back() + elif opts.output_stream == 'wdt': + backup_using_wdt() + else: + start_backup() + +if __name__ == "__main__": + main() diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc new file mode 100644 index 00000000000..ba2e7ace0c5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc @@ -0,0 +1,150 @@ +--echo # +--echo # Testing concurrent transactions. +--echo # + +--source include/count_sessions.inc +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); +connect (con3,localhost,root,,); + +connection con1; +begin; +insert into t values (); # 1 + +connection con2; +begin; +insert into t values (); # 2 + +connection con3; +begin; +insert into t values (); # 3 + +connection con1; +insert into t values (); # 4 + +connection con2; +insert into t values (); # 5 + +connection con3; +insert into t values (); # 6 + +connection con2; +commit; + +connection con3; +rollback; + +connection con1; +commit; + +delete from t; + +--echo # Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Slave value before restart +sync_slave_with_master; +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +connection slave; +--source include/stop_slave.inc +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection default; +--echo # Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--let $rpl_server_number = 2 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc +--echo # Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +disconnect con1; +disconnect con2; +disconnect con3; +--source include/wait_until_count_sessions.inc + +--echo # +--echo # Testing interaction of merge markers with various DDL statements. +--echo # +connection slave; +--source include/stop_slave.inc + +connection default; + +--echo # Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Remove auto_increment property. +alter table t modify i int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add column j. +alter table t add column j int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Rename tables. +rename table t to t2; +rename table t2 to t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Change auto_increment property +alter table t auto_increment = 1000; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t drop primary key, add key (i), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t add key (j), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Drop table. +drop table t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc similarity index 94% rename from storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc rename to storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc index 42cab5ad8c1..239ec74169a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc @@ -1,6 +1,4 @@ ---disable_warnings -DROP TABLE IF EXISTS t1, t2, t3; ---enable_warnings +--source include/count_sessions.inc if ($data_order_desc) { @@ -20,7 +18,7 @@ eval CREATE TABLE t1( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; # Create a second identical table to validate that bulk loading different # tables in the same session works @@ -30,7 +28,7 @@ eval CREATE TABLE t2( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; # Create a third table using partitions to validate that bulk loading works # across a partitioned table @@ -40,7 +38,7 @@ eval CREATE TABLE t3( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; --let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` @@ -154,3 +152,5 @@ EOF # Cleanup disconnect other; DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc new file mode 100644 index 00000000000..84a9d8c578e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -0,0 +1,143 @@ +--source include/count_sessions.inc + +SET rocksdb_bulk_load_size=3; +SET rocksdb_bulk_load_allow_unsorted=1; + +### Test individual INSERTs ### + +# A table with only a PK won't have rows until the bulk load is finished +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# A table with a PK and a SK shows rows immediately +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b)) + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log + +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# Inserting into another table finishes bulk load to the previous table +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; + +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t2 VALUES (1,1); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +INSERT INTO t1 VALUES (2,2); +SELECT * FROM t2 FORCE INDEX (PRIMARY); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1, t2; + +### Test bulk load from a file ### +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; + +--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` +# Create a text file with data to import into the table. +# PK and SK are not in any order +--let ROCKSDB_INFILE = $file +perl; +my $fn = $ENV{'ROCKSDB_INFILE'}; +open(my $fh, '>', $fn) || die "perl open($fn): $!"; +my $max = 5000000; +my $sign = 1; +for (my $ii = 0; $ii < $max; $ii++) +{ + my $a = 1 + $sign * $ii; + my $b = 1 - $sign * $ii; + $sign = -$sign; + print $fh "$a\t$b\n"; +} +close($fh); +EOF +--file_exists $file + +# Make sure a snapshot held by another user doesn't block the bulk load +connect (other,localhost,root,,); +set session transaction isolation level repeatable read; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +start transaction with consistent snapshot; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +connection default; +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +--disable_query_log +--echo LOAD DATA INFILE INTO TABLE t1; +eval LOAD DATA INFILE '$file' INTO TABLE t1; +--echo LOAD DATA INFILE INTO TABLE t2; +eval LOAD DATA INFILE '$file' INTO TABLE t2; +--echo LOAD DATA INFILE INTO TABLE t3; +eval LOAD DATA INFILE '$file' INTO TABLE t3; +--enable_query_log +set rocksdb_bulk_load=0; + +--remove_file $file + +# Make sure row count index stats are correct +--replace_column 6 # 7 # 8 # 9 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +ANALYZE TABLE t1, t2, t3; + +--replace_column 6 # 7 # 8 # 9 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +# Make sure all the data is there. +select count(a) from t1; +select count(b) from t1; +select count(a) from t2; +select count(b) from t2; +select count(a) from t3; +select count(b) from t3; + +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; + +disconnect other; +DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result index 2d1ba7ca1d8..ec9c940f250 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result @@ -15,6 +15,10 @@ count(b) 3000000 ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Failed to acquire lock due to max_num_locks limit +set session rocksdb_bulk_load=1; +ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=0; SELECT COUNT(*) as c FROM (SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`) UNION DISTINCT diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result index f8508febb01..5d947603ec5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result @@ -778,3 +778,20 @@ set global rocksdb_force_flush_memtable_now = true; select * from t1; col1 col2 extra DROP TABLE t1; +create table t1 (i int auto_increment, key(i)) engine=rocksdb; +insert into t1 values(); +insert into t1 values(); +insert into t1 values(); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL AUTO_INCREMENT, + KEY `i` (`i`) +) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL AUTO_INCREMENT, + KEY `i` (`i`) +) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1 +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result new file mode 100644 index 00000000000..1a2abbf3285 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result @@ -0,0 +1,35 @@ +# +# Test how MyRocks behaves when RocksDB reports corrupted data. +# +# +# Test server crashes on corrupted data and restarts +# +create table t1 ( +pk int not null primary key, +col1 varchar(10) +) engine=rocksdb; +insert into t1 values (1,1),(2,2),(3,3); +select * from t1 where pk=1; +pk col1 +1 1 +set session debug= "+d,rocksdb_return_status_corrupted"; +select * from t1 where pk=1; +ERROR HY000: Lost connection to MySQL server during query +# +# The same for scan queries +# +select * from t1; +pk col1 +1 1 +2 2 +3 3 +set session debug= "+d,rocksdb_return_status_corrupted"; +select * from t1; +ERROR HY000: Lost connection to MySQL server during query +# +# Test restart failure. The server is shutdown at this point. +# +# +# Remove corruption file and restart cleanly +# +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result new file mode 100644 index 00000000000..ae29b4e415d --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result @@ -0,0 +1,113 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +create table t (i int primary key auto_increment) engine=rocksdb; +# +# Testing concurrent transactions. +# +begin; +insert into t values (); +begin; +insert into t values (); +begin; +insert into t values (); +insert into t values (); +insert into t values (); +insert into t values (); +commit; +rollback; +commit; +delete from t; +# Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 7 +# Slave value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/stop_slave.inc +include/rpl_restart_server.inc [server_number=1] +# Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/rpl_restart_server.inc [server_number=2] +include/start_slave.inc +# Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +# +# Testing interaction of merge markers with various DDL statements. +# +include/stop_slave.inc +# Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +# Remove auto_increment property. +alter table t modify i int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t NULL +# Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Add column j. +alter table t add column j int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Rename tables. +rename table t to t2; +rename table t2 to t; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Change auto_increment property +alter table t auto_increment = 1000; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1000 +alter table t auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t drop primary key, add key (i), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t add key (j), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 16 +# Drop table. +drop table t; +include/rpl_restart_server.inc [server_number=1] +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result new file mode 100644 index 00000000000..949928f5a9f --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result @@ -0,0 +1,113 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; +# +# Testing concurrent transactions. +# +begin; +insert into t values (); +begin; +insert into t values (); +begin; +insert into t values (); +insert into t values (); +insert into t values (); +insert into t values (); +commit; +rollback; +commit; +delete from t; +# Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 7 +# Slave value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/stop_slave.inc +include/rpl_restart_server.inc [server_number=1] +# Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/rpl_restart_server.inc [server_number=2] +include/start_slave.inc +# Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +# +# Testing interaction of merge markers with various DDL statements. +# +include/stop_slave.inc +# Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1 +# Remove auto_increment property. +alter table t modify i int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t NULL +# Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Add column j. +alter table t add column j int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Rename tables. +rename table t to t2; +rename table t2 to t; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Change auto_increment property +alter table t auto_increment = 1000; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1000 +alter table t auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t drop primary key, add key (i), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t add key (j), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 16 +# Drop table. +drop table t; +include/rpl_restart_server.inc [server_number=1] +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result new file mode 100644 index 00000000000..79030e35225 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result @@ -0,0 +1,107 @@ +# +# Testing upgrading from server without merges for auto_increment +# to new server with such support. +# +set debug='+d,myrocks_autoinc_upgrade'; +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +2 +3 +delete from t where i > 1; +select * from t; +i +1 +select table_name, index_name, auto_increment +from information_schema.rocksdb_ddl where table_name = 't'; +table_name index_name auto_increment +t PRIMARY NULL +set debug='-d,myrocks_autoinc_upgrade'; +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +2 +3 +4 +select table_name, index_name, auto_increment +from information_schema.rocksdb_ddl where table_name = 't'; +table_name index_name auto_increment +t PRIMARY 5 +delete from t where i > 1; +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +5 +6 +7 +drop table t; +# +# Testing crash safety of transactions. +# +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +# Before anything +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_before"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 4 +select max(i) from t; +max(i) +3 +# After engine prepare +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after_prepare"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 4 +select max(i) from t; +max(i) +3 +# After binlog +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after_log"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +select max(i) from t; +max(i) +5 +# After everything +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 8 +select max(i) from t; +max(i) +7 +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result index b14a7a4c0a9..848d362f9e1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result @@ -62,3 +62,82 @@ SELECT a FROM t1 ORDER BY a; a 127 DROP TABLE t1; +#--------------------------- +# test large autoincrement values +#--------------------------- +SET auto_increment_increment = 1; +SET auto_increment_offset = 1; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +18446744073709551614 b +DROP TABLE t1; +SET auto_increment_increment = 300; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +ERROR HY000: Failed to read auto-increment value from storage engine +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +DROP TABLE t1; +SET auto_increment_offset = 200; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +ERROR HY000: Failed to read auto-increment value from storage engine +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result b/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result deleted file mode 100644 index 28b5b6cd070..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result +++ /dev/null @@ -1 +0,0 @@ -# The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE. diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result index 6e9be75aaae..99d36e0e7da 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in ascending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result new file mode 100644 index 00000000000..1b1cf524011 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result @@ -0,0 +1,8 @@ +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load_allow_unsorted=1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1); +DROP TABLE t1; +SET rocksdb_bulk_load=0; +SELECT * FROM t1; +ERROR 42S02: Table 'test.t1' doesn't exist diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result index be198d02aaf..6f6bd9a3fec 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result @@ -1,4 +1,4 @@ -CREATE TABLE t1(pk INT, PRIMARY KEY(pk)); +CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES(10); INSERT INTO t1 VALUES(11); @@ -15,7 +15,23 @@ INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); SET rocksdb_bulk_load=0; -ERROR HY000: Lost connection to MySQL server during query +ERROR HY000: Rows inserted during bulk load must not overlap existing rows +SHOW VARIABLES LIKE 'rocksdb_bulk_load'; +Variable_name Value +rocksdb_bulk_load OFF +SELECT * FROM t1; +pk +10 +11 +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(1); +INSERT INTO t1 VALUES(2); +INSERT INTO t1 VALUES(20); +INSERT INTO t1 VALUES(21); +SELECT * FROM t1; +pk +10 +11 TRUNCATE TABLE t1; SET rocksdb_bulk_load_allow_unsorted=1; SET rocksdb_bulk_load=1; @@ -43,3 +59,34 @@ pk 202 SET rocksdb_bulk_load_allow_unsorted=DEFAULT; DROP TABLE t1; +CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (),(),(); +ERROR HY000: Rows must be inserted in primary key order during bulk load operation +SET rocksdb_bulk_load=0; +DROP TABLE t1; +SET @orig_table_open_cache=@@global.table_open_cache; +CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(13, 0); +INSERT INTO t1 VALUES(2, 'test 2'); +Warnings: +Warning 1366 Incorrect integer value: 'test 2' for column 'b' at row 1 +INSERT INTO t1 VALUES(@id, @arg04); +SET @@global.table_open_cache=FALSE; +Warnings: +Warning 1292 Truncated incorrect table_open_cache value: '0' +INSERT INTO t1 VALUES(51479+0.333333333,1); +DROP TABLE t1; +SET @@global.table_open_cache=@orig_table_open_cache; +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 VALUES (1), (2); +INSERT INTO t1 VALUES (1); +INSERT INTO t2 VALUES (3); +ERROR HY000: Rows inserted during bulk load must not overlap existing rows +SET rocksdb_bulk_load=0; +DROP TABLE t1; +DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result index a30838b9c9f..684e0efeeca 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in ascending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result index 4d259b5ea2f..1bee69ec8d4 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in descending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result index d2d3befdf04..403d72e185a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in descending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 31509227279..0245edf735b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -1,12 +1,12 @@ -DROP TABLE IF EXISTS t1; SET rocksdb_bulk_load_size=3; SET rocksdb_bulk_load_allow_unsorted=1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b SET rocksdb_bulk_load=0; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b -3 5 -1 3 @@ -14,42 +14,49 @@ a b 4 -2 6 -4 DROP TABLE t1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b)); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b)) +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b -6 -4 -4 -2 -2 0 --1 3 --3 5 SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +-3 5 +-1 3 +2 0 +4 -2 +6 -4 DROP TABLE t1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); -CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES (1,1); INSERT INTO t2 VALUES (1,1); -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 INSERT INTO t1 VALUES (2,2); -SELECT * FROM t2; +SELECT * FROM t2 FORCE INDEX (PRIMARY); a b 1 1 -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 SET rocksdb_bulk_load=0; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 2 2 DROP TABLE t1, t2; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); -CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1") +ENGINE=ROCKSDB; CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") -PARTITION BY KEY() PARTITIONS 4; +ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE @@ -97,5 +104,14 @@ count(a) select count(b) from t3; count(b) 5000000 +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +a b +-4999998 5000000 +-4999996 4999998 +-4999994 4999996 +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; +a b +4999999 -4999997 +4999997 -4999995 +4999995 -4999993 DROP TABLE t1, t2, t3; -SET rocksdb_bulk_load_allow_unsorted=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result new file mode 100644 index 00000000000..fcd05fd60b4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result @@ -0,0 +1,117 @@ +SET rocksdb_bulk_load_size=3; +SET rocksdb_bulk_load_allow_unsorted=1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +6 -4 +4 -2 +2 0 +-1 3 +-3 5 +DROP TABLE t1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1", KEY(b)) +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +6 -4 +4 -2 +2 0 +-1 3 +-3 5 +DROP TABLE t1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t2 VALUES (1,1); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +1 1 +INSERT INTO t1 VALUES (2,2); +SELECT * FROM t2 FORCE INDEX (PRIMARY); +a b +1 1 +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +1 1 +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +2 2 +1 1 +DROP TABLE t1, t2; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; +set session transaction isolation level repeatable read; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +STAT_TYPE VALUE +DB_NUM_SNAPSHOTS 0 +start transaction with consistent snapshot; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +STAT_TYPE VALUE +DB_NUM_SNAPSHOTS 1 +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +LOAD DATA INFILE INTO TABLE t1; +LOAD DATA INFILE INTO TABLE t2; +LOAD DATA INFILE INTO TABLE t3; +set rocksdb_bulk_load=0; +SHOW TABLE STATUS WHERE name LIKE 't%'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +ANALYZE TABLE t1, t2, t3; +Table Op Msg_type Msg_text +test.t1 analyze status OK +test.t2 analyze status OK +test.t3 analyze status OK +SHOW TABLE STATUS WHERE name LIKE 't%'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +select count(a) from t1; +count(a) +5000000 +select count(b) from t1; +count(b) +5000000 +select count(a) from t2; +count(a) +5000000 +select count(b) from t2; +count(b) +5000000 +select count(a) from t3; +count(a) +5000000 +select count(b) from t3; +count(b) +5000000 +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +a b +4999999 -4999997 +4999997 -4999995 +4999995 -4999993 +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; +a b +-4999998 5000000 +-4999996 4999998 +-4999994 4999996 +DROP TABLE t1, t2, t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result index 840ad9a723c..94585af6cfc 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result @@ -1,3 +1,38 @@ +CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb; +insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4), +(5, 4),(6, 4),(7, 4),(8, 4),(9, 4); +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="id"; +cardinality +NULL +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="a"; +cardinality +NULL +ANALYZE TABLE t0; +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +FLOOR(@N/cardinality) +1 +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; +FLOOR(@N/cardinality) +2 +SET GLOBAL rocksdb_force_flush_memtable_now = 1; +ANALYZE TABLE t0; +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +FLOOR(@N/cardinality) +1 +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; +FLOOR(@N/cardinality) +2 +drop table t0; DROP TABLE IF EXISTS t1; create table t1( id bigint not null primary key, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result new file mode 100644 index 00000000000..24c1b730325 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result @@ -0,0 +1,6 @@ +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; +variable_name variable_value +ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; +variable_name variable_value +ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result index 005aa1e2989..90eccc89cb5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result @@ -1642,17 +1642,27 @@ r1_1 -0.9 r1_1 0.0 r1_1 0.9 r1_1 0.9 -SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1; -MAX(f) 9.999999680285692e37 -MAX(d) 1e81 -MAX(d10_10) 0.9999999999 -MAX(d1_0) 9 -MAX(d53) 100000000000000000000000000000000000000000000000000000 -MAX(d53_10) 10000000000000000000000000000000000000000000.0000000000 -MAX(f0) 9.999999680285692e37 -MAX(f20_3) 99999998430674940.000 -MAX(f23_0) 9.999999680285692e37 -MAX(r1_1) 0.9 +SELECT +CONCAT('', MAX(f)), +CONCAT('', MAX(f0)), +CONCAT('', MAX(r1_1)), +CONCAT('', MAX(f23_0)), +CONCAT('', MAX(f20_3)), +CONCAT('', MAX(d)), +CONCAT('', MAX(d1_0)), +CONCAT('', MAX(d10_10)), +CONCAT('', MAX(d53)), +CONCAT('', MAX(d53_10)) FROM t1; +CONCAT('', MAX(f)) 9.999999680285692e37 +CONCAT('', MAX(d)) 1e81 +CONCAT('', MAX(d10_10)) 0.9999999999 +CONCAT('', MAX(d1_0)) 9 +CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000 +CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000 +CONCAT('', MAX(f0)) 9.999999680285692e37 +CONCAT('', MAX(f20_3)) 99999998430674940.000 +CONCAT('', MAX(f23_0)) 9.999999680285692e37 +CONCAT('', MAX(r1_1)) 0.9 INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES ( 9999999999999999999999999999999999999999999999999999999999999.9999, 9999999999999999999999999999999999999999999999999999999999999.9999, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result index 051784528b1..40416b4daec 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result @@ -1432,17 +1432,27 @@ r1_1 -0.9 r1_1 0.0 r1_1 0.9 r1_1 0.9 -SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1; -MAX(f) 9.999999680285692e37 -MAX(d) 1e81 -MAX(d10_10) 0.9999999999 -MAX(d1_0) 9 -MAX(d53) 100000000000000000000000000000000000000000000000000000 -MAX(d53_10) 10000000000000000000000000000000000000000000.0000000000 -MAX(f0) 9.999999680285692e37 -MAX(f20_3) 99999998430674940.000 -MAX(f23_0) 9.999999680285692e37 -MAX(r1_1) 0.9 +SELECT +CONCAT('', MAX(f)), +CONCAT('', MAX(f0)), +CONCAT('', MAX(r1_1)), +CONCAT('', MAX(f23_0)), +CONCAT('', MAX(f20_3)), +CONCAT('', MAX(d)), +CONCAT('', MAX(d1_0)), +CONCAT('', MAX(d10_10)), +CONCAT('', MAX(d53)), +CONCAT('', MAX(d53_10)) FROM t1; +CONCAT('', MAX(f)) 9.999999680285692e37 +CONCAT('', MAX(d)) 1e81 +CONCAT('', MAX(d10_10)) 0.9999999999 +CONCAT('', MAX(d1_0)) 9 +CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000 +CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000 +CONCAT('', MAX(f0)) 9.999999680285692e37 +CONCAT('', MAX(f20_3)) 99999998430674940.000 +CONCAT('', MAX(f23_0)) 9.999999680285692e37 +CONCAT('', MAX(r1_1)) 0.9 INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES ( 9999999999999999999999999999999999999999999999999999999999999.9999, 9999999999999999999999999999999999999999999999999999999999999.9999, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result index 66b6a5fe799..30f56f0ba3f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result @@ -317,17 +317,27 @@ r1_1 0.0 r1_1 0.0 r1_1 0.9 r1_1 0.9 -SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1; -MAX(f) 9.999999680285692e37 -MAX(d) 1e81 -MAX(d10_10) 0.9999999999 -MAX(d1_0) 9 -MAX(d53) 100000000000000000000000000000000000000000000000000000 -MAX(d53_10) 10000000000000000000000000000000000000000000.0000000000 -MAX(f0) 9.999999680285692e37 -MAX(f20_3) 99999998430674940.000 -MAX(f23_0) 9.999999680285692e37 -MAX(r1_1) 0.9 +SELECT +CONCAT('', MAX(f)), +CONCAT('', MAX(f0)), +CONCAT('', MAX(r1_1)), +CONCAT('', MAX(f23_0)), +CONCAT('', MAX(f20_3)), +CONCAT('', MAX(d)), +CONCAT('', MAX(d1_0)), +CONCAT('', MAX(d10_10)), +CONCAT('', MAX(d53)), +CONCAT('', MAX(d53_10)) FROM t1; +CONCAT('', MAX(f)) 9.999999680285692e37 +CONCAT('', MAX(d)) 1e81 +CONCAT('', MAX(d10_10)) 0.9999999999 +CONCAT('', MAX(d1_0)) 9 +CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000 +CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000 +CONCAT('', MAX(f0)) 9.999999680285692e37 +CONCAT('', MAX(f20_3)) 99999998430674940.000 +CONCAT('', MAX(f23_0)) 9.999999680285692e37 +CONCAT('', MAX(r1_1)) 0.9 INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES ( 9999999999999999999999999999999999999999999999999999999999999.9999, 9999999999999999999999999999999999999999999999999999999999999.9999, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result index 823ad2f2fc4..45902651211 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result @@ -313,17 +313,27 @@ r1_1 0.0 r1_1 0.0 r1_1 0.9 r1_1 0.9 -SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1; -MAX(f) 9.999999680285692e37 -MAX(d) 1e81 -MAX(d10_10) 0.9999999999 -MAX(d1_0) 9 -MAX(d53) 100000000000000000000000000000000000000000000000000000 -MAX(d53_10) 10000000000000000000000000000000000000000000.0000000000 -MAX(f0) 9.999999680285692e37 -MAX(f20_3) 99999998430674940.000 -MAX(f23_0) 9.999999680285692e37 -MAX(r1_1) 0.9 +SELECT +CONCAT('', MAX(f)), +CONCAT('', MAX(f0)), +CONCAT('', MAX(r1_1)), +CONCAT('', MAX(f23_0)), +CONCAT('', MAX(f20_3)), +CONCAT('', MAX(d)), +CONCAT('', MAX(d1_0)), +CONCAT('', MAX(d10_10)), +CONCAT('', MAX(d53)), +CONCAT('', MAX(d53_10)) FROM t1; +CONCAT('', MAX(f)) 9.999999680285692e37 +CONCAT('', MAX(d)) 1e81 +CONCAT('', MAX(d10_10)) 0.9999999999 +CONCAT('', MAX(d1_0)) 9 +CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000 +CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000 +CONCAT('', MAX(f0)) 9.999999680285692e37 +CONCAT('', MAX(f20_3)) 99999998430674940.000 +CONCAT('', MAX(f23_0)) 9.999999680285692e37 +CONCAT('', MAX(r1_1)) 0.9 INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES ( 9999999999999999999999999999999999999999999999999999999999999.9999, 9999999999999999999999999999999999999999999999999999999999999.9999, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result index d7cb89becb7..1e7509172cb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result @@ -66,13 +66,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -122,13 +115,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -147,13 +133,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -204,13 +183,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -229,13 +201,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -254,13 +219,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -295,13 +253,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -324,8 +275,12 @@ i 3 select * from t where i=2 for update; select * from t where i=3 for update; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; +deadlocks +true rollback; i 3 @@ -410,13 +365,6 @@ KEY LOCK TYPE: SHARED INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -455,13 +403,6 @@ KEY LOCK TYPE: SHARED INDEX NAME: NOT FOUND; IDX_ID TABLE NAME: NOT FOUND; IDX_ID ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: NOT FOUND; IDX_ID -TABLE NAME: NOT FOUND; IDX_ID --------TXN_ID GOT DEADLOCK--------- diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result index c6c484a7dc5..526d6247e60 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result @@ -1,17 +1,22 @@ DROP TABLE IF EXISTS is_ddl_t1; DROP TABLE IF EXISTS is_ddl_t2; +DROP TABLE IF EXISTS is_ddl_t3; CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf') ENGINE = ROCKSDB; CREATE TABLE is_ddl_t2 (x INT, y INT, z INT, PRIMARY KEY (z, y) COMMENT 'zy_cf', KEY (x)) ENGINE = ROCKSDB; -SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; -TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF -test is_ddl_t2 NULL PRIMARY 1 13 zy_cf -test is_ddl_t2 NULL x 2 13 default -test is_ddl_t1 NULL PRIMARY 1 13 default -test is_ddl_t1 NULL j 2 13 default -test is_ddl_t1 NULL k 2 13 kl_cf +CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB +COMMENT "ttl_duration=3600;"; +SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; +TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS +test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1 +test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0 +test is_ddl_t2 NULL x 2 13 default 0 0 +test is_ddl_t1 NULL PRIMARY 1 13 default 0 0 +test is_ddl_t1 NULL j 2 13 default 0 0 +test is_ddl_t1 NULL k 2 13 kl_cf 0 0 DROP TABLE is_ddl_t1; DROP TABLE is_ddl_t2; +DROP TABLE is_ddl_t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result new file mode 100644 index 00000000000..1c67387bcdc --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result @@ -0,0 +1,172 @@ +set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; +set @prior_deadlock_detect = @@rocksdb_deadlock_detect; +set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks; +set global rocksdb_deadlock_detect = on; +set global rocksdb_lock_wait_timeout = 10000; +# Clears deadlock buffer of any prior deadlocks. +set global rocksdb_max_latest_deadlocks = 0; +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +show create table information_schema.rocksdb_deadlock; +Table Create Table +ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` ( + `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT '0', + `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT '0', + `CF_NAME` varchar(193) NOT NULL DEFAULT '', + `WAITING_KEY` varchar(513) NOT NULL DEFAULT '', + `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '', + `INDEX_NAME` varchar(193) NOT NULL DEFAULT '', + `TABLE_NAME` varchar(193) NOT NULL DEFAULT '', + `ROLLED_BACK` bigint(8) NOT NULL DEFAULT '0' +) ENGINE=MEMORY DEFAULT CHARSET=utf8 +create table t (i int primary key) engine=rocksdb; +insert into t values (1), (2), (3); +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +Deadlock #1 +begin; +select * from t where i=1 for update; +i +1 +begin; +select * from t where i=2 for update; +i +2 +select * from t where i=2 for update; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +i +2 +rollback; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +Deadlock #2 +begin; +select * from t where i=1 for update; +i +1 +begin; +select * from t where i=2 for update; +i +2 +select * from t where i=2 for update; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +i +2 +rollback; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +set global rocksdb_max_latest_deadlocks = 10; +Deadlock #3 +begin; +select * from t where i=1 for update; +i +1 +begin; +select * from t where i=2 for update; +i +2 +select * from t where i=2 for update; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +i +2 +rollback; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +set global rocksdb_max_latest_deadlocks = 1; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +set rocksdb_deadlock_detect_depth = 2; +Deadlock #4 +begin; +select * from t where i=1 for update; +i +1 +begin; +select * from t where i=2 for update; +i +2 +begin; +select * from t where i=3 for update; +i +3 +select * from t where i=2 for update; +select * from t where i=3 for update; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +i +3 +rollback; +i +2 +rollback; +set global rocksdb_max_latest_deadlocks = 5; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +Deadlock #5 +begin; +select * from t where i=1 for update; +i +1 +begin; +select * from t where i=2 for update; +i +2 +begin; +select * from t where i=3 lock in share mode; +i +3 +select * from t where i=100 for update; +i +select * from t where i=101 for update; +i +select * from t where i=2 for update; +select * from t where i=3 lock in share mode; +i +3 +select * from t where i=200 for update; +i +select * from t where i=201 for update; +i +select * from t where i=1 lock in share mode; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +i +2 +rollback; +rollback; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1 +set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; +set global rocksdb_deadlock_detect = @prior_deadlock_detect; +drop table t; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE INDEX_NAME TABLE_NAME 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED INDEX_NAME TABLE_NAME 1 +set global rocksdb_max_latest_deadlocks = 0; +# Clears deadlock buffer of any existent deadlocks. +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result index 22c8592ff28..9af8e8107ca 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result @@ -25,10 +25,10 @@ UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100; DROP TABLE t0, t1; create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb; insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300); +set global rocksdb_force_flush_memtable_now=1; analyze table t1; Table Op Msg_type Msg_text test.t1 analyze status OK -set global rocksdb_force_flush_memtable_now=1; explain select * from t1 where key1 = 1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ref key1 key1 5 const # NULL diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result index 62875e378a4..797f339d8b1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result @@ -6,6 +6,19 @@ t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL INSERT INTO t1 VALUES ('538647864786478647864'); Warnings: Warning 1264 Out of range value for column 'pk' at row 1 +SELECT * FROM t1; +pk +5 +9223372036854775807 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +9223372036854775807 SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL @@ -19,3 +32,37 @@ SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL DROP TABLE t1; +CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT); +INSERT INTO t1 VALUES (5); +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (1000); +Warnings: +Warning 1264 Out of range value for column 'pk' at row 1 +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 2 15 30 0 0 0 127 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '127' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '127' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result index d0bfb05fd1b..96efca6e2b7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result @@ -8,6 +8,7 @@ ROW_LOCK_WAIT_TIMEOUTS begin; set @@rocksdb_lock_wait_timeout=1; begin; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; insert into t values(0); ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; @@ -16,6 +17,10 @@ ROW_LOCK_WAIT_TIMEOUTS select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; ROW_LOCK_WAIT_TIMEOUTS 1 +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; +waits +true +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; insert into t values(0); ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; @@ -24,4 +29,7 @@ ROW_LOCK_WAIT_TIMEOUTS select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; ROW_LOCK_WAIT_TIMEOUTS 2 +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; +waits +true drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result new file mode 100644 index 00000000000..5a1eeb9fa3f --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result @@ -0,0 +1,20 @@ +CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*"); +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; +FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files +1 +SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files; +@@global.open_files_limit - 1 = @@global.rocksdb_max_open_files +1 +SELECT @@global.rocksdb_max_open_files; +@@global.rocksdb_max_open_files +0 +CREATE TABLE t1(a INT) ENGINE=ROCKSDB; +INSERT INTO t1 VALUES(0),(1),(2),(3),(4); +SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1; +DROP TABLE t1; +SELECT @@global.rocksdb_max_open_files; +@@global.rocksdb_max_open_files +-1 +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; +FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files +1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result index 27b1779627b..1fe61fe9fc5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result @@ -36,7 +36,7 @@ explain select b, d from t where d > 4; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan rows_read -1509 +1505 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -44,7 +44,7 @@ explain select a, b, c, d from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra @@ -58,13 +58,13 @@ explain select e from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select e from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where rows_read -251 +250 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -72,13 +72,13 @@ explain select a, b, c, d from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -51 +26 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -86,13 +86,13 @@ explain select e from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select e from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where rows_read -251 +250 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -100,13 +100,13 @@ explain select a, b, c, d from t where a in (1, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index rows_read -502 +500 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -102 +52 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -114,13 +114,13 @@ explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index rows_read -753 +750 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -153 +78 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -128,13 +128,13 @@ explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index rows_read -204 +200 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -44 +24 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -142,13 +142,13 @@ explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) a id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index rows_read -765 +750 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -165 +90 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -156,13 +156,13 @@ explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using where; Using index rows_read -51 +50 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -11 +6 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -170,7 +170,7 @@ explain select a+1, b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a+1, b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra @@ -184,7 +184,7 @@ explain select b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra @@ -204,7 +204,7 @@ explain select a, b, c, d from t where a = b and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan rows_read -9 +5 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=on'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result index 2e8610d43bd..6b959a6439e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result @@ -14,8 +14,13 @@ test t1 NULL BLOCK_READ_BYTE # test t1 NULL BLOCK_READ_TIME # test t1 NULL BLOCK_CHECKSUM_TIME # test t1 NULL BLOCK_DECOMPRESS_TIME # +test t1 NULL GET_READ_BYTES # +test t1 NULL MULTIGET_READ_BYTES # +test t1 NULL ITER_READ_BYTES # test t1 NULL INTERNAL_KEY_SKIPPED_COUNT # test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT # +test t1 NULL INTERNAL_RECENT_SKIPPED_COUNT # +test t1 NULL INTERNAL_MERGE_COUNT # test t1 NULL GET_SNAPSHOT_TIME # test t1 NULL GET_FROM_MEMTABLE_TIME # test t1 NULL GET_FROM_MEMTABLE_COUNT # @@ -23,9 +28,12 @@ test t1 NULL GET_POST_PROCESS_TIME # test t1 NULL GET_FROM_OUTPUT_FILES_TIME # test t1 NULL SEEK_ON_MEMTABLE_TIME # test t1 NULL SEEK_ON_MEMTABLE_COUNT # +test t1 NULL NEXT_ON_MEMTABLE_COUNT # +test t1 NULL PREV_ON_MEMTABLE_COUNT # test t1 NULL SEEK_CHILD_SEEK_TIME # test t1 NULL SEEK_CHILD_SEEK_COUNT # -test t1 NULL SEEK_IN_HEAP_TIME # +test t1 NULL SEEK_MIN_HEAP_TIME # +test t1 NULL SEEK_MAX_HEAP_TIME # test t1 NULL SEEK_INTERNAL_SEEK_TIME # test t1 NULL FIND_NEXT_USER_ENTRY_TIME # test t1 NULL WRITE_WAL_TIME # @@ -41,6 +49,12 @@ test t1 NULL NEW_TABLE_BLOCK_ITER_NANOS # test t1 NULL NEW_TABLE_ITERATOR_NANOS # test t1 NULL BLOCK_SEEK_NANOS # test t1 NULL FIND_TABLE_NANOS # +test t1 NULL BLOOM_MEMTABLE_HIT_COUNT # +test t1 NULL BLOOM_MEMTABLE_MISS_COUNT # +test t1 NULL BLOOM_SST_HIT_COUNT # +test t1 NULL BLOOM_SST_MISS_COUNT # +test t1 NULL KEY_LOCK_WAIT_TIME # +test t1 NULL KEY_LOCK_WAIT_COUNT # test t1 NULL IO_THREAD_POOL_ID # test t1 NULL IO_BYTES_WRITTEN # test t1 NULL IO_BYTES_READ # @@ -59,8 +73,13 @@ BLOCK_READ_BYTE # BLOCK_READ_TIME # BLOCK_CHECKSUM_TIME # BLOCK_DECOMPRESS_TIME # +GET_READ_BYTES # +MULTIGET_READ_BYTES # +ITER_READ_BYTES # INTERNAL_KEY_SKIPPED_COUNT # INTERNAL_DELETE_SKIPPED_COUNT # +INTERNAL_RECENT_SKIPPED_COUNT # +INTERNAL_MERGE_COUNT # GET_SNAPSHOT_TIME # GET_FROM_MEMTABLE_TIME # GET_FROM_MEMTABLE_COUNT # @@ -68,9 +87,12 @@ GET_POST_PROCESS_TIME # GET_FROM_OUTPUT_FILES_TIME # SEEK_ON_MEMTABLE_TIME # SEEK_ON_MEMTABLE_COUNT # +NEXT_ON_MEMTABLE_COUNT # +PREV_ON_MEMTABLE_COUNT # SEEK_CHILD_SEEK_TIME # SEEK_CHILD_SEEK_COUNT # -SEEK_IN_HEAP_TIME # +SEEK_MIN_HEAP_TIME # +SEEK_MAX_HEAP_TIME # SEEK_INTERNAL_SEEK_TIME # FIND_NEXT_USER_ENTRY_TIME # WRITE_WAL_TIME # @@ -86,6 +108,12 @@ NEW_TABLE_BLOCK_ITER_NANOS # NEW_TABLE_ITERATOR_NANOS # BLOCK_SEEK_NANOS # FIND_TABLE_NANOS # +BLOOM_MEMTABLE_HIT_COUNT # +BLOOM_MEMTABLE_MISS_COUNT # +BLOOM_SST_HIT_COUNT # +BLOOM_SST_MISS_COUNT # +KEY_LOCK_WAIT_TIME # +KEY_LOCK_WAIT_COUNT # IO_THREAD_POOL_ID # IO_BYTES_WRITTEN # IO_BYTES_READ # diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index 76de58ce813..7fee90a1c85 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -563,9 +563,6 @@ insert into t30 values ('row3', 'row3-key', 'row3-data'), ('row4', 'row4-key', 'row4-data'), ('row5', 'row5-key', 'row5-data'); -analyze table t30; -Table Op Msg_type Msg_text -test.t30 analyze status OK explain select * from t30 where key1 <='row3-key'; id select_type table type possible_keys key key_len ref rows Extra @@ -855,13 +852,14 @@ ERROR 42S02: Unknown table 'test.t45' # Now it fails if there is data overlap with what # already exists # -show variables like 'rocksdb%'; +show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files'; Variable_name Value rocksdb_access_hint_on_compaction_start 1 rocksdb_advise_random_on_open ON rocksdb_allow_concurrent_memtable_write OFF rocksdb_allow_mmap_reads OFF rocksdb_allow_mmap_writes OFF +rocksdb_allow_to_start_after_corruption OFF rocksdb_blind_delete_primary_key OFF rocksdb_block_cache_size 536870912 rocksdb_block_restart_interval 16 @@ -881,7 +879,6 @@ rocksdb_compaction_sequential_deletes 0 rocksdb_compaction_sequential_deletes_count_sd OFF rocksdb_compaction_sequential_deletes_file_size 0 rocksdb_compaction_sequential_deletes_window 0 -rocksdb_concurrent_prepare ON rocksdb_create_checkpoint rocksdb_create_if_missing ON rocksdb_create_missing_column_families OFF @@ -905,13 +902,13 @@ rocksdb_enable_ttl_read_filtering ON rocksdb_enable_write_thread_adaptive_yield OFF rocksdb_error_if_exists OFF rocksdb_flush_log_at_trx_commit 1 -rocksdb_flush_memtable_on_analyze ON rocksdb_force_compute_memtable_stats ON rocksdb_force_compute_memtable_stats_cachetime 0 rocksdb_force_flush_memtable_and_lzero_now OFF rocksdb_force_flush_memtable_now OFF rocksdb_force_index_records_in_range 0 rocksdb_hash_index_allow_collision ON +rocksdb_ignore_unknown_options ON rocksdb_index_type kBinarySearch rocksdb_info_log_level error_level rocksdb_io_write_timeout 0 @@ -928,8 +925,7 @@ rocksdb_max_background_jobs 2 rocksdb_max_latest_deadlocks 5 rocksdb_max_log_file_size 0 rocksdb_max_manifest_file_size 18446744073709551615 -rocksdb_max_open_files -1 -rocksdb_max_row_locks 1073741824 +rocksdb_max_row_locks 1048576 rocksdb_max_subcompactions 1 rocksdb_max_total_wal_size 0 rocksdb_merge_buf_size 67108864 @@ -964,6 +960,7 @@ rocksdb_table_cache_numshardbits 6 rocksdb_table_stats_sampling_pct 10 rocksdb_tmpdir rocksdb_trace_sst_api OFF +rocksdb_two_write_queues ON rocksdb_unsafe_for_binlog OFF rocksdb_update_cf_options rocksdb_use_adaptive_mutex OFF @@ -1447,6 +1444,7 @@ rocksdb_rows_read # rocksdb_rows_updated # rocksdb_rows_deleted_blind # rocksdb_rows_expired # +rocksdb_rows_filtered # rocksdb_system_rows_deleted # rocksdb_system_rows_inserted # rocksdb_system_rows_read # @@ -1457,11 +1455,22 @@ rocksdb_queries_point # rocksdb_queries_range # rocksdb_covered_secondary_key_lookups # rocksdb_block_cache_add # +rocksdb_block_cache_add_failures # +rocksdb_block_cache_bytes_read # +rocksdb_block_cache_bytes_write # +rocksdb_block_cache_data_add # +rocksdb_block_cache_data_bytes_insert # rocksdb_block_cache_data_hit # rocksdb_block_cache_data_miss # +rocksdb_block_cache_filter_add # +rocksdb_block_cache_filter_bytes_evict # +rocksdb_block_cache_filter_bytes_insert # rocksdb_block_cache_filter_hit # rocksdb_block_cache_filter_miss # rocksdb_block_cache_hit # +rocksdb_block_cache_index_add # +rocksdb_block_cache_index_bytes_evict # +rocksdb_block_cache_index_bytes_insert # rocksdb_block_cache_index_hit # rocksdb_block_cache_index_miss # rocksdb_block_cache_miss # @@ -1478,9 +1487,13 @@ rocksdb_compaction_key_drop_new # rocksdb_compaction_key_drop_obsolete # rocksdb_compaction_key_drop_user # rocksdb_flush_write_bytes # +rocksdb_get_hit_l0 # +rocksdb_get_hit_l1 # +rocksdb_get_hit_l2_and_up # rocksdb_getupdatessince_calls # rocksdb_git_date # rocksdb_git_hash # +rocksdb_iter_bytes_read # rocksdb_memtable_hit # rocksdb_memtable_miss # rocksdb_no_file_closes # @@ -1488,6 +1501,12 @@ rocksdb_no_file_errors # rocksdb_no_file_opens # rocksdb_num_iterators # rocksdb_number_block_not_compressed # +rocksdb_number_db_next # +rocksdb_number_db_next_found # +rocksdb_number_db_prev # +rocksdb_number_db_prev_found # +rocksdb_number_db_seek # +rocksdb_number_db_seek_found # rocksdb_number_deletes_filtered # rocksdb_number_keys_read # rocksdb_number_keys_updated # @@ -1502,11 +1521,11 @@ rocksdb_number_sst_entry_merge # rocksdb_number_sst_entry_other # rocksdb_number_sst_entry_put # rocksdb_number_sst_entry_singledelete # -rocksdb_number_stat_computes # rocksdb_number_superversion_acquires # rocksdb_number_superversion_cleanups # rocksdb_number_superversion_releases # -rocksdb_rate_limit_delay_millis # +rocksdb_row_lock_deadlocks # +rocksdb_row_lock_wait_timeouts # rocksdb_snapshot_conflict_errors # rocksdb_stall_l0_file_count_limit_slowdowns # rocksdb_stall_locked_l0_file_count_limit_slowdowns # @@ -1534,6 +1553,7 @@ ROCKSDB_ROWS_READ ROCKSDB_ROWS_UPDATED ROCKSDB_ROWS_DELETED_BLIND ROCKSDB_ROWS_EXPIRED +ROCKSDB_ROWS_FILTERED ROCKSDB_SYSTEM_ROWS_DELETED ROCKSDB_SYSTEM_ROWS_INSERTED ROCKSDB_SYSTEM_ROWS_READ @@ -1544,11 +1564,22 @@ ROCKSDB_QUERIES_POINT ROCKSDB_QUERIES_RANGE ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS ROCKSDB_BLOCK_CACHE_ADD +ROCKSDB_BLOCK_CACHE_ADD_FAILURES +ROCKSDB_BLOCK_CACHE_BYTES_READ +ROCKSDB_BLOCK_CACHE_BYTES_WRITE +ROCKSDB_BLOCK_CACHE_DATA_ADD +ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT ROCKSDB_BLOCK_CACHE_DATA_HIT ROCKSDB_BLOCK_CACHE_DATA_MISS +ROCKSDB_BLOCK_CACHE_FILTER_ADD +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT ROCKSDB_BLOCK_CACHE_FILTER_HIT ROCKSDB_BLOCK_CACHE_FILTER_MISS ROCKSDB_BLOCK_CACHE_HIT +ROCKSDB_BLOCK_CACHE_INDEX_ADD +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT ROCKSDB_BLOCK_CACHE_INDEX_HIT ROCKSDB_BLOCK_CACHE_INDEX_MISS ROCKSDB_BLOCK_CACHE_MISS @@ -1565,9 +1596,13 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE ROCKSDB_COMPACTION_KEY_DROP_USER ROCKSDB_FLUSH_WRITE_BYTES +ROCKSDB_GET_HIT_L0 +ROCKSDB_GET_HIT_L1 +ROCKSDB_GET_HIT_L2_AND_UP ROCKSDB_GETUPDATESSINCE_CALLS ROCKSDB_GIT_DATE ROCKSDB_GIT_HASH +ROCKSDB_ITER_BYTES_READ ROCKSDB_MEMTABLE_HIT ROCKSDB_MEMTABLE_MISS ROCKSDB_NO_FILE_CLOSES @@ -1575,6 +1610,12 @@ ROCKSDB_NO_FILE_ERRORS ROCKSDB_NO_FILE_OPENS ROCKSDB_NUM_ITERATORS ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED +ROCKSDB_NUMBER_DB_NEXT +ROCKSDB_NUMBER_DB_NEXT_FOUND +ROCKSDB_NUMBER_DB_PREV +ROCKSDB_NUMBER_DB_PREV_FOUND +ROCKSDB_NUMBER_DB_SEEK +ROCKSDB_NUMBER_DB_SEEK_FOUND ROCKSDB_NUMBER_DELETES_FILTERED ROCKSDB_NUMBER_KEYS_READ ROCKSDB_NUMBER_KEYS_UPDATED @@ -1589,11 +1630,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE ROCKSDB_NUMBER_SST_ENTRY_OTHER ROCKSDB_NUMBER_SST_ENTRY_PUT ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE -ROCKSDB_NUMBER_STAT_COMPUTES ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS ROCKSDB_NUMBER_SUPERVERSION_RELEASES -ROCKSDB_RATE_LIMIT_DELAY_MILLIS +ROCKSDB_ROW_LOCK_DEADLOCKS +ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS ROCKSDB_SNAPSHOT_CONFLICT_ERRORS ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS @@ -1623,6 +1664,7 @@ ROCKSDB_ROWS_READ ROCKSDB_ROWS_UPDATED ROCKSDB_ROWS_DELETED_BLIND ROCKSDB_ROWS_EXPIRED +ROCKSDB_ROWS_FILTERED ROCKSDB_SYSTEM_ROWS_DELETED ROCKSDB_SYSTEM_ROWS_INSERTED ROCKSDB_SYSTEM_ROWS_READ @@ -1633,11 +1675,22 @@ ROCKSDB_QUERIES_POINT ROCKSDB_QUERIES_RANGE ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS ROCKSDB_BLOCK_CACHE_ADD +ROCKSDB_BLOCK_CACHE_ADD_FAILURES +ROCKSDB_BLOCK_CACHE_BYTES_READ +ROCKSDB_BLOCK_CACHE_BYTES_WRITE +ROCKSDB_BLOCK_CACHE_DATA_ADD +ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT ROCKSDB_BLOCK_CACHE_DATA_HIT ROCKSDB_BLOCK_CACHE_DATA_MISS +ROCKSDB_BLOCK_CACHE_FILTER_ADD +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT ROCKSDB_BLOCK_CACHE_FILTER_HIT ROCKSDB_BLOCK_CACHE_FILTER_MISS ROCKSDB_BLOCK_CACHE_HIT +ROCKSDB_BLOCK_CACHE_INDEX_ADD +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT ROCKSDB_BLOCK_CACHE_INDEX_HIT ROCKSDB_BLOCK_CACHE_INDEX_MISS ROCKSDB_BLOCK_CACHE_MISS @@ -1654,9 +1707,13 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE ROCKSDB_COMPACTION_KEY_DROP_USER ROCKSDB_FLUSH_WRITE_BYTES +ROCKSDB_GET_HIT_L0 +ROCKSDB_GET_HIT_L1 +ROCKSDB_GET_HIT_L2_AND_UP ROCKSDB_GETUPDATESSINCE_CALLS ROCKSDB_GIT_DATE ROCKSDB_GIT_HASH +ROCKSDB_ITER_BYTES_READ ROCKSDB_MEMTABLE_HIT ROCKSDB_MEMTABLE_MISS ROCKSDB_NO_FILE_CLOSES @@ -1664,6 +1721,12 @@ ROCKSDB_NO_FILE_ERRORS ROCKSDB_NO_FILE_OPENS ROCKSDB_NUM_ITERATORS ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED +ROCKSDB_NUMBER_DB_NEXT +ROCKSDB_NUMBER_DB_NEXT_FOUND +ROCKSDB_NUMBER_DB_PREV +ROCKSDB_NUMBER_DB_PREV_FOUND +ROCKSDB_NUMBER_DB_SEEK +ROCKSDB_NUMBER_DB_SEEK_FOUND ROCKSDB_NUMBER_DELETES_FILTERED ROCKSDB_NUMBER_KEYS_READ ROCKSDB_NUMBER_KEYS_UPDATED @@ -1678,11 +1741,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE ROCKSDB_NUMBER_SST_ENTRY_OTHER ROCKSDB_NUMBER_SST_ENTRY_PUT ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE -ROCKSDB_NUMBER_STAT_COMPUTES ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS ROCKSDB_NUMBER_SUPERVERSION_RELEASES -ROCKSDB_RATE_LIMIT_DELAY_MILLIS +ROCKSDB_ROW_LOCK_DEADLOCKS +ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS ROCKSDB_SNAPSHOT_CONFLICT_ERRORS ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result new file mode 100644 index 00000000000..a245fa851de --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result @@ -0,0 +1,11 @@ +# +# Issue #728: Assertion `covers_key(b)' failed in int +# myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&, +# const rocksdb::Slice&) +# +CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT); +INSERT INTO t2(c1)VALUES(0); +SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC; +c1 c2 c3 +0 NULL NULL +DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result index d7a4f9dd065..10a6a02008e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result @@ -7,5 +7,5 @@ count(*) 10000 explain select c1 from t1 where c1 > 5 limit 10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index +1 SIMPLE t1 range i i 9 NULL # Using where; Using index drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result index 1bcd3692b4a..9fc5db98d7d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result @@ -83,12 +83,12 @@ FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT WHERE TABLE_SCHEMA = 'test' GROUP BY TABLE_NAME, PARTITION_NAME; TABLE_SCHEMA TABLE_NAME PARTITION_NAME COUNT(STAT_TYPE) -test t1 NULL 43 -test t2 NULL 43 -test t4 p0 43 -test t4 p1 43 -test t4 p2 43 -test t4 p3 43 +test t1 NULL 57 +test t2 NULL 57 +test t4 p0 57 +test t4 p1 57 +test t4 p2 57 +test t4 p3 57 SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS; CF_NAME OPTION_TYPE VALUE __system__ COMPARATOR # @@ -153,9 +153,15 @@ __system__ TABLE_FACTORY::BLOCK_SIZE # __system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION # __system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL # __system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +__system__ TABLE_FACTORY::METADATA_BLOCK_SIZE # +__system__ TABLE_FACTORY::PARTITION_FILTERS # +__system__ TABLE_FACTORY::USE_DELTA_ENCODING # __system__ TABLE_FACTORY::FILTER_POLICY # __system__ TABLE_FACTORY::WHOLE_KEY_FILTERING # +__system__ TABLE_FACTORY::VERIFY_COMPRESSION # +__system__ TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # __system__ TABLE_FACTORY::FORMAT_VERSION # +__system__ TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # cf_t1 COMPARATOR # cf_t1 MERGE_OPERATOR # cf_t1 COMPACTION_FILTER # @@ -218,9 +224,15 @@ cf_t1 TABLE_FACTORY::BLOCK_SIZE # cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION # cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL # cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +cf_t1 TABLE_FACTORY::METADATA_BLOCK_SIZE # +cf_t1 TABLE_FACTORY::PARTITION_FILTERS # +cf_t1 TABLE_FACTORY::USE_DELTA_ENCODING # cf_t1 TABLE_FACTORY::FILTER_POLICY # cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING # +cf_t1 TABLE_FACTORY::VERIFY_COMPRESSION # +cf_t1 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # cf_t1 TABLE_FACTORY::FORMAT_VERSION # +cf_t1 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # default COMPARATOR # default MERGE_OPERATOR # default COMPACTION_FILTER # @@ -283,9 +295,15 @@ default TABLE_FACTORY::BLOCK_SIZE # default TABLE_FACTORY::BLOCK_SIZE_DEVIATION # default TABLE_FACTORY::BLOCK_RESTART_INTERVAL # default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +default TABLE_FACTORY::METADATA_BLOCK_SIZE # +default TABLE_FACTORY::PARTITION_FILTERS # +default TABLE_FACTORY::USE_DELTA_ENCODING # default TABLE_FACTORY::FILTER_POLICY # default TABLE_FACTORY::WHOLE_KEY_FILTERING # +default TABLE_FACTORY::VERIFY_COMPRESSION # +default TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # default TABLE_FACTORY::FORMAT_VERSION # +default TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # rev:cf_t2 COMPARATOR # rev:cf_t2 MERGE_OPERATOR # rev:cf_t2 COMPACTION_FILTER # @@ -348,9 +366,15 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE # rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION # rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL # rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +rev:cf_t2 TABLE_FACTORY::METADATA_BLOCK_SIZE # +rev:cf_t2 TABLE_FACTORY::PARTITION_FILTERS # +rev:cf_t2 TABLE_FACTORY::USE_DELTA_ENCODING # rev:cf_t2 TABLE_FACTORY::FILTER_POLICY # rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING # +rev:cf_t2 TABLE_FACTORY::VERIFY_COMPRESSION # +rev:cf_t2 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION # +rev:cf_t2 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # DROP TABLE t1; DROP TABLE t2; DROP TABLE t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result index 36d5eb24e30..7ca7f27bd0f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result @@ -1,4 +1,19 @@ -CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB; +create table t1 (pk int primary key) engine=rocksdb; +show tables; +Tables_in_test +#mysql50#t1#sql-test +t1 set session debug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug= "-d,gen_sql_table_name"; +show tables; +Tables_in_test +#mysql50#t1#sql-test +t2 +show tables; +Tables_in_test +create table t2 (pk int primary key) engine=rocksdb; +show tables; +Tables_in_test +t2 +drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result index 0a91fe3fcbd..42a8d19afb5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result @@ -51,18 +51,30 @@ INSERT INTO t1 values (3); INSERT INTO t1 values (5); INSERT INTO t1 values (7); set global rocksdb_debug_ttl_rec_ts = 0; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +4 set global rocksdb_enable_ttl_read_filtering=0; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 1 3 5 7 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 set global rocksdb_enable_ttl_read_filtering=1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +4 DROP TABLE t1; CREATE TABLE t1 ( a int, @@ -187,19 +199,35 @@ a # Switching to connection 2 set global rocksdb_force_flush_memtable_now=1; set global rocksdb_compact_cf='default'; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +1 # Switching to connection 1 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 1 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 UPDATE t1 set a = a + 1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 2 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 COMMIT; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +1 DROP TABLE t1; set global rocksdb_force_flush_memtable_now=1; set global rocksdb_compact_cf='default'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_float.result b/storage/rocksdb/mysql-test/rocksdb/r/type_float.result index 0f78926c89a..36fb59e3cb9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/type_float.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/type_float.result @@ -127,17 +127,27 @@ r1_1 -0.9 r1_1 0.0 r1_1 0.9 r1_1 0.9 -SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1; -MAX(f) 9.999999680285692e37 -MAX(d) 1e81 -MAX(d10_10) 0.9999999999 -MAX(d1_0) 9 -MAX(d53) 100000000000000000000000000000000000000000000000000000 -MAX(d53_10) 10000000000000000000000000000000000000000000.0000000000 -MAX(f0) 9.999999680285692e37 -MAX(f20_3) 99999998430674940.000 -MAX(f23_0) 9.999999680285692e37 -MAX(r1_1) 0.9 +SELECT +CONCAT('', MAX(f)), +CONCAT('', MAX(f0)), +CONCAT('', MAX(r1_1)), +CONCAT('', MAX(f23_0)), +CONCAT('', MAX(f20_3)), +CONCAT('', MAX(d)), +CONCAT('', MAX(d1_0)), +CONCAT('', MAX(d10_10)), +CONCAT('', MAX(d53)), +CONCAT('', MAX(d53_10)) FROM t1; +CONCAT('', MAX(f)) 9.999999680285692e37 +CONCAT('', MAX(d)) 1e81 +CONCAT('', MAX(d10_10)) 0.9999999999 +CONCAT('', MAX(d1_0)) 9 +CONCAT('', MAX(d53)) 100000000000000000000000000000000000000000000000000000 +CONCAT('', MAX(d53_10)) 10000000000000000000000000000000000000000000.0000000000 +CONCAT('', MAX(f0)) 9.999999680285692e37 +CONCAT('', MAX(f20_3)) 99999998430674940.000 +CONCAT('', MAX(f23_0)) 9.999999680285692e37 +CONCAT('', MAX(r1_1)) 0.9 INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES ( 9999999999999999999999999999999999999999999999999999999999999.9999, 9999999999999999999999999999999999999999999999999999999999999.9999, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result index 9784b1a1d94..dd01f7f8077 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result @@ -1,4 +1,3 @@ -drop table if exists t1,t2; # # A basic test whether endspace-aware variable length encoding # works when in PK @@ -756,3 +755,16 @@ email_i 1 drop table t; set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct; set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums; +drop table if exists t; +Warnings: +Note 1051 Unknown table 'test.t' +create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb; +insert into t(i,h) values('a','b'); +check table t; +Table Op Msg_type Msg_text +test.t check status OK +alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null; +check table t; +Table Op Msg_type Msg_text +test.t check status OK +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result index c106f0e77f6..a6a198e402b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result @@ -1,2 +1,9 @@ - RocksDB: Can't enable both use_direct_reads and allow_mmap_reads - RocksDB: Can't enable both use_direct_io_for_flush_and_compaction and allow_mmap_writes +Checking direct reads +Checking direct writes +Checking rocksdb_flush_log_at_trx_commit +Validate flush_log settings when direct writes is enabled +set global rocksdb_flush_log_at_trx_commit=0; +set global rocksdb_flush_log_at_trx_commit=1; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1' +set global rocksdb_flush_log_at_trx_commit=2; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2' diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result index ee23446eec0..d0a9b034927 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result @@ -3,6 +3,7 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true; create table aaa (id int primary key, i int) engine rocksdb; set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit; SET GLOBAL rocksdb_flush_log_at_trx_commit=1; +insert aaa(id, i) values(0,1); select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; insert aaa(id, i) values(1,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; @@ -16,11 +17,11 @@ insert aaa(id, i) values(3,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; variable_value-@a 3 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; SET GLOBAL rocksdb_flush_log_at_trx_commit=0; -select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; insert aaa(id, i) values(4,1); -SET GLOBAL rocksdb_flush_log_at_trx_commit=2; select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=2; insert aaa(id, i) values(5,1); truncate table aaa; drop table aaa; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test index 7e600224dcc..b743b5ca39d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test @@ -62,7 +62,12 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; # disable duplicate index warning --disable_warnings # now do same index using copy algorithm +# hitting max row locks (1M) +--error ER_RDB_STATUS_GENERAL ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=1; +ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=0; --enable_warnings # checksum testing diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test index 1f3ef49e534..18ccf2e39f6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test @@ -135,3 +135,15 @@ set global rocksdb_force_flush_memtable_now = true; select * from t1; DROP TABLE t1; + +## https://github.com/facebook/mysql-5.6/issues/736 +create table t1 (i int auto_increment, key(i)) engine=rocksdb; +insert into t1 values(); +insert into t1 values(); +insert into t1 values(); + +show create table t1; +--source include/restart_mysqld.inc +show create table t1; + +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test new file mode 100644 index 00000000000..1863c3247b8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test @@ -0,0 +1,75 @@ +--source include/have_rocksdb.inc +--source include/not_valgrind.inc + +--echo # +--echo # Test how MyRocks behaves when RocksDB reports corrupted data. +--echo # + +--source include/have_debug.inc + +# use custom error log to assert on error message in search_pattern_in_file.inc +--let LOG=$MYSQLTEST_VARDIR/tmp/allow_to_start_after_corruption_debug.err +--let SEARCH_FILE=$LOG + +# restart server to change error log and ignore corruptopn on startup +--let $_mysqld_option=--log-error=$LOG --rocksdb_allow_to_start_after_corruption=1 +--source include/restart_mysqld_with_option.inc + +--echo # +--echo # Test server crashes on corrupted data and restarts +--echo # +create table t1 ( + pk int not null primary key, + col1 varchar(10) +) engine=rocksdb; + +insert into t1 values (1,1),(2,2),(3,3); + +select * from t1 where pk=1; +set session debug= "+d,rocksdb_return_status_corrupted"; +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +select * from t1 where pk=1; +--source include/wait_until_disconnected.inc +--let SEARCH_PATTERN=data corruption detected +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # The same for scan queries +--echo # + +--source include/start_mysqld_with_option.inc +select * from t1; +set session debug= "+d,rocksdb_return_status_corrupted"; +--exec echo "wait" > $_expect_file_name +--error 2013 +select * from t1; +--source include/wait_until_disconnected.inc +--let SEARCH_PATTERN=data corruption detected +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # Test restart failure. The server is shutdown at this point. +--echo # + +# remove flag to ignore corruption +--let $_mysqld_option=--log-error=$LOG +--error 0 +--exec $MYSQLD_CMD $_mysqld_option +--let SEARCH_PATTERN=The server will exit normally and stop restart attempts +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # Remove corruption file and restart cleanly +--echo # + +--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/.rocksdb/ROCKSDB_CORRUPTED +--source include/start_mysqld_with_option.inc + +drop table t1; + +# Restart mysqld with default options +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf new file mode 100644 index 00000000000..f4257d80fdb --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf @@ -0,0 +1,8 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +binlog_format=row +[mysqld.2] +binlog_format=row +slave_parallel_workers=1 +rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test new file mode 100644 index 00000000000..b4f329dd1e7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test @@ -0,0 +1,9 @@ +--source include/have_rocksdb.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +create table t (i int primary key auto_increment) engine=rocksdb; + +--source suite/rocksdb/include/autoinc_crash_safe.inc + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf new file mode 100644 index 00000000000..f4257d80fdb --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf @@ -0,0 +1,8 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +binlog_format=row +[mysqld.2] +binlog_format=row +slave_parallel_workers=1 +rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test new file mode 100644 index 00000000000..5b6a761dd94 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test @@ -0,0 +1,10 @@ +--source include/have_rocksdb.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc +--source include/have_partition.inc + +create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; + +--source suite/rocksdb/include/autoinc_crash_safe.inc + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt new file mode 100644 index 00000000000..83ed8522e72 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt @@ -0,0 +1 @@ +--binlog-format=row diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test new file mode 100644 index 00000000000..df67338a0f1 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test @@ -0,0 +1,118 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc +--source include/have_log_bin.inc + +--echo # +--echo # Testing upgrading from server without merges for auto_increment +--echo # to new server with such support. +--echo # + +set debug='+d,myrocks_autoinc_upgrade'; +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +delete from t where i > 1; +select * from t; + +select table_name, index_name, auto_increment + from information_schema.rocksdb_ddl where table_name = 't'; + +set debug='-d,myrocks_autoinc_upgrade'; + +--source include/restart_mysqld.inc + +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +select table_name, index_name, auto_increment + from information_schema.rocksdb_ddl where table_name = 't'; + +delete from t where i > 1; + +--source include/restart_mysqld.inc + +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +drop table t; + +--echo # +--echo # Testing crash safety of transactions. +--echo # +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); + +--echo # Before anything +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_before"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After engine prepare +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after_prepare"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After binlog +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after_log"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After everything +begin; +insert into t values (); +insert into t values (); +set debug="+d,crash_commit_after"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test index c3f3550e303..e8f343bd876 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test @@ -62,4 +62,42 @@ SELECT LAST_INSERT_ID(); SELECT a FROM t1 ORDER BY a; DROP TABLE t1; +--echo #--------------------------- +--echo # test large autoincrement values +--echo #--------------------------- +SET auto_increment_increment = 1; +SET auto_increment_offset = 1; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; + +SET auto_increment_increment = 300; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; + +SET auto_increment_offset = 200; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test b/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test deleted file mode 100644 index 375571f705d..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test +++ /dev/null @@ -1,3 +0,0 @@ ---source include/have_rocksdb.inc - ---echo # The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE. diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test index bc63fbb549a..1dabc098a1a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test @@ -3,4 +3,4 @@ --let pk_cf=cf1 --let data_order_desc=0 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test new file mode 100644 index 00000000000..18e40fbf4ab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test @@ -0,0 +1,19 @@ +--source include/have_rocksdb.inc + +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; + +SET rocksdb_bulk_load_allow_unsorted=1; +SET rocksdb_bulk_load=1; + +INSERT INTO t1 VALUES (1); + +--connect (con1,localhost,root,,) +DROP TABLE t1; + +--connection default +--disconnect con1 + +# This would have crashed the server prior to the fix +SET rocksdb_bulk_load=0; +--error ER_NO_SUCH_TABLE +SELECT * FROM t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test index b80361f325c..dcaa69c25a0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test @@ -1,7 +1,13 @@ --source include/have_rocksdb.inc +--source include/count_sessions.inc + +--let LOG1=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.1.err +--let $_mysqld_option=--log-error=$LOG1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc ### Bulk load ### -CREATE TABLE t1(pk INT, PRIMARY KEY(pk)); +CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB; # Make sure we get an error with out of order keys during bulk load SET rocksdb_bulk_load=1; @@ -21,21 +27,47 @@ INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); -# This last crashes the server (intentionally) because we can't return any -# error information from a SET = ---exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---error 2013 +--error ER_OVERLAPPING_KEYS SET rocksdb_bulk_load=0; ---exec grep "RocksDB: Error 197 finalizing last SST file while setting bulk loading variable" $MYSQLTEST_VARDIR/log/mysqld.1.err | cut -d] -f2 ---exec echo "" >$MYSQLTEST_VARDIR/log/mysqld.1.err +SHOW VARIABLES LIKE 'rocksdb_bulk_load'; -# restart the crashed server ---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +SELECT * FROM t1; -# Make sure the error exists in the .err log and then restart the server ---enable_reconnect ---source include/wait_until_connected_again.inc +--let SEARCH_FILE=$LOG1 +--let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while setting bulk loading variable +--source include/search_pattern_in_file.inc + +--let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err +--let $_mysqld_option=--log-error=$LOG2 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc +--remove_file $LOG1 + + +# Make sure we get an error in log when we disconnect and do not assert the server +--connect (con1,localhost,root,,) +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(1); +INSERT INTO t1 VALUES(2); +INSERT INTO t1 VALUES(20); +INSERT INTO t1 VALUES(21); +--connection default +--disconnect con1 + +SELECT * FROM t1; + +--source include/wait_until_count_sessions.inc + +--let SEARCH_FILE=$LOG2 +--let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while disconnecting +--source include/search_pattern_in_file.inc + +--let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err +--let $_mysqld_option=--log-error=$LOG3 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc +--remove_file $LOG2 TRUNCATE TABLE t1; @@ -63,3 +95,46 @@ SELECT * FROM t1; SET rocksdb_bulk_load_allow_unsorted=DEFAULT; DROP TABLE t1; + +# This would trigger a debug assertion that is just an error in release builds +CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--error ER_KEYS_OUT_OF_ORDER +INSERT INTO t1 VALUES (),(),(); +SET rocksdb_bulk_load=0; +DROP TABLE t1; + +# Crash when table open cache closes handler with bulk load operation not finalized +SET @orig_table_open_cache=@@global.table_open_cache; +CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(13, 0); +INSERT INTO t1 VALUES(2, 'test 2'); +INSERT INTO t1 VALUES(@id, @arg04); +SET @@global.table_open_cache=FALSE; +INSERT INTO t1 VALUES(51479+0.333333333,1); +DROP TABLE t1; +SET @@global.table_open_cache=@orig_table_open_cache; + +--let SEARCH_FILE=$LOG3 +--let SEARCH_PATTERN=RocksDB: Error 198 finalizing bulk load while closing handler +--source include/search_pattern_in_file.inc + +--source include/restart_mysqld.inc + +--remove_file $LOG3 + +# Switch between tables, but also introduce duplicate key errors +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 VALUES (1), (2); +INSERT INTO t1 VALUES (1); +--error ER_OVERLAPPING_KEYS +INSERT INTO t2 VALUES (3); +SET rocksdb_bulk_load=0; +DROP TABLE t1; +DROP TABLE t2; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test index 5aec6ff5e99..e78de62bd1e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test @@ -3,4 +3,4 @@ --let pk_cf=rev:cf1 --let data_order_desc=0 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test index 83006f9e446..0da4ec69cc3 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test @@ -3,4 +3,4 @@ --let pk_cf=rev:cf1 --let data_order_desc=1 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test index df7b6a7c821..3a2918c482a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test @@ -3,4 +3,4 @@ --let pk_cf=cf1 --let data_order_desc=1 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test index 9cd3aaeafac..589e1a13b49 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test @@ -2,135 +2,4 @@ --let pk_cf=cf1 ---disable_warnings -DROP TABLE IF EXISTS t1; ---enable_warnings - -SET rocksdb_bulk_load_size=3; -SET rocksdb_bulk_load_allow_unsorted=1; - -### Test individual INSERTs ### - -# A table with only a PK won't have rows until the bulk load is finished -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -SET rocksdb_bulk_load=1; ---disable_query_log -let $sign = 1; -let $max = 5; -let $i = 1; -while ($i <= $max) { - let $a = 1 + $sign * $i; - let $b = 1 - $sign * $i; - let $sign = -$sign; - let $insert = INSERT INTO t1 VALUES ($a, $b); - eval $insert; - inc $i; -} ---enable_query_log -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -SELECT * FROM t1; -DROP TABLE t1; - -# A table with a PK and a SK shows rows immediately -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b)); -SET rocksdb_bulk_load=1; ---disable_query_log -let $sign = 1; -let $max = 5; -let $i = 1; -while ($i <= $max) { - let $a = 1 + $sign * $i; - let $b = 1 - $sign * $i; - let $sign = -$sign; - let $insert = INSERT INTO t1 VALUES ($a, $b); - eval $insert; - inc $i; -} ---enable_query_log - -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -DROP TABLE t1; - -# Inserting into another table finishes bulk load to the previous table -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); - -SET rocksdb_bulk_load=1; -INSERT INTO t1 VALUES (1,1); -INSERT INTO t2 VALUES (1,1); -SELECT * FROM t1; -INSERT INTO t1 VALUES (2,2); -SELECT * FROM t2; -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -SELECT * FROM t1; -DROP TABLE t1, t2; - -### Test bulk load from a file ### -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf"); -eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") - PARTITION BY KEY() PARTITIONS 4; - ---let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` -# Create a text file with data to import into the table. -# PK and SK are not in any order ---let ROCKSDB_INFILE = $file -perl; -my $fn = $ENV{'ROCKSDB_INFILE'}; -open(my $fh, '>', $fn) || die "perl open($fn): $!"; -my $max = 5000000; -my $sign = 1; -for (my $ii = 0; $ii < $max; $ii++) -{ - my $a = 1 + $sign * $ii; - my $b = 1 - $sign * $ii; - print $fh "$a\t$b\n"; -} -close($fh); -EOF ---file_exists $file - -# Make sure a snapshot held by another user doesn't block the bulk load -connect (other,localhost,root,,); -set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; - -connection default; -set rocksdb_bulk_load=1; -set rocksdb_bulk_load_size=100000; ---disable_query_log ---echo LOAD DATA INFILE INTO TABLE t1; -eval LOAD DATA INFILE '$file' INTO TABLE t1; ---echo LOAD DATA INFILE INTO TABLE t2; -eval LOAD DATA INFILE '$file' INTO TABLE t2; ---echo LOAD DATA INFILE INTO TABLE t3; -eval LOAD DATA INFILE '$file' INTO TABLE t3; ---enable_query_log -set rocksdb_bulk_load=0; - ---remove_file $file - -# Make sure row count index stats are correct ---replace_column 6 # 7 # 8 # 9 # -SHOW TABLE STATUS WHERE name LIKE 't%'; - -ANALYZE TABLE t1, t2, t3; - ---replace_column 6 # 7 # 8 # 9 # -SHOW TABLE STATUS WHERE name LIKE 't%'; - -# Make sure all the data is there. -select count(a) from t1; -select count(b) from t1; -select count(a) from t2; -select count(b) from t2; -select count(a) from t3; -select count(b) from t3; - -DROP TABLE t1, t2, t3; -SET rocksdb_bulk_load_allow_unsorted=0; +--source ../include/bulk_load_unsorted.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test index 067a91d5d8b..9e043ed9e4a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test @@ -1,5 +1,4 @@ --source include/have_rocksdb.inc ---source include/have_nodebug.inc # Cannot change unsorted input preference during bulk load SET rocksdb_bulk_load=1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test new file mode 100644 index 00000000000..de9a5c26424 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test @@ -0,0 +1,5 @@ +--source include/have_rocksdb.inc + +--let pk_cf=rev:cf1 + +--source ../include/bulk_load_unsorted.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test index df2b0673315..559b131eca8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test @@ -1,5 +1,47 @@ --source include/have_rocksdb.inc +# Test memtable cardinality statistics +CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb; + +# populate the table with 10 reconds where cardinality of id is N and a is N/2. +insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4), +(5, 4),(6, 4),(7, 4),(8, 4),(9, 4); + +# Assert no cardinality data exists before ANALYZE TABLE is done +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="id"; +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="a"; + +--disable_result_log +ANALYZE TABLE t0; +--enable_result_log + +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; + +# Flush the table and re-run the test as statistics is calculated a bit +# differently for memtable and SST files +SET GLOBAL rocksdb_force_flush_memtable_now = 1; +--disable_result_log +ANALYZE TABLE t0; +--enable_result_log + +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; + +drop table t0; + +# Test big table on SST + --disable_warnings DROP TABLE IF EXISTS t1; --enable_warnings diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test new file mode 100644 index 00000000000..25213544bb5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test @@ -0,0 +1,21 @@ +--disable_warnings +let $MYSQLD_DATADIR= `select @@datadir`; +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err; +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; + +--exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--shutdown_server 10 +--error 1 +--exec $MYSQLD_CMD --rocksdb_ignore_unknown_options=0 --loose-console > $error_log 2>&1 + +let SEARCH_FILE= $error_log; +let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed; +--source include/search_pattern_in_file.inc +--enable_reconnect +--exec echo "restart" > $restart_file +--source include/wait_until_connected_again.inc +--exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}" +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test index d2abcb3b63b..9677d2dbbaa 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test @@ -21,29 +21,29 @@ let $con3= `SELECT CONNECTION_ID()`; connection default; eval create table t (i int primary key) engine=$engine; insert into t values (1), (2), (3); ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #1; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #2; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 10; echo Deadlock #3; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 1; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; connection con3; @@ -77,8 +77,10 @@ let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx where thread_id = $con2 and waiting_key != ""; --source include/wait_condition.inc +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; --error ER_LOCK_DEADLOCK select * from t where i=1 for update; +select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; rollback; connection con2; @@ -91,7 +93,7 @@ rollback; connection default; set global rocksdb_max_latest_deadlocks = 5; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #5; @@ -133,7 +135,7 @@ connection con3; rollback; connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; disconnect con1; @@ -143,11 +145,11 @@ disconnect con3; set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; set global rocksdb_deadlock_detect = @prior_deadlock_detect; drop table t; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 0; --echo # Clears deadlock buffer of any existent deadlocks. set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ show engine rocksdb transaction status; --source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test index 9ee23a88bbe..292e4b7c8b5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test @@ -7,6 +7,7 @@ --disable_warnings DROP TABLE IF EXISTS is_ddl_t1; DROP TABLE IF EXISTS is_ddl_t2; +DROP TABLE IF EXISTS is_ddl_t3; --enable_warnings CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, @@ -17,8 +18,12 @@ CREATE TABLE is_ddl_t2 (x INT, y INT, z INT, PRIMARY KEY (z, y) COMMENT 'zy_cf', KEY (x)) ENGINE = ROCKSDB; -SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; +CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB + COMMENT "ttl_duration=3600;"; + +SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; # cleanup DROP TABLE is_ddl_t1; DROP TABLE is_ddl_t2; +DROP TABLE is_ddl_t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test new file mode 100644 index 00000000000..21558899782 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test @@ -0,0 +1,158 @@ +--source include/have_rocksdb.inc + +set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; +set @prior_deadlock_detect = @@rocksdb_deadlock_detect; +set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks; +set global rocksdb_deadlock_detect = on; +set global rocksdb_lock_wait_timeout = 10000; +--echo # Clears deadlock buffer of any prior deadlocks. +set global rocksdb_max_latest_deadlocks = 0; +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; + +# needed by simple_deadlock.inc +let $engine = rocksdb; + +--source include/count_sessions.inc +connect (con1,localhost,root,,); +let $con1= `SELECT CONNECTION_ID()`; + +connect (con2,localhost,root,,); +let $con2= `SELECT CONNECTION_ID()`; + +connect (con3,localhost,root,,); +let $con3= `SELECT CONNECTION_ID()`; + +connection default; +show create table information_schema.rocksdb_deadlock; + +create table t (i int primary key) engine=rocksdb; +insert into t values (1), (2), (3); +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #1; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #2; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 10; + +echo Deadlock #3; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 1; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +connection con3; +set rocksdb_deadlock_detect_depth = 2; + +echo Deadlock #4; +connection con1; +begin; +select * from t where i=1 for update; + +connection con2; +begin; +select * from t where i=2 for update; + +connection con3; +begin; +select * from t where i=3 for update; + +connection con1; +send select * from t where i=2 for update; + +connection con2; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con1 and waiting_key != ""; +--source include/wait_condition.inc + +send select * from t where i=3 for update; + +connection con3; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con2 and waiting_key != ""; +--source include/wait_condition.inc + +--error ER_LOCK_DEADLOCK +select * from t where i=1 for update; +rollback; + +connection con2; +reap; +rollback; + +connection con1; +reap; +rollback; + +connection default; +set global rocksdb_max_latest_deadlocks = 5; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #5; +connection con1; +begin; +select * from t where i=1 for update; + +connection con2; +begin; +select * from t where i=2 for update; + +connection con3; +begin; +select * from t where i=3 lock in share mode; + +connection con1; +select * from t where i=100 for update; +select * from t where i=101 for update; +send select * from t where i=2 for update; + +connection con2; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con1 and waiting_key != ""; +--source include/wait_condition.inc + +select * from t where i=3 lock in share mode; +select * from t where i=200 for update; +select * from t where i=201 for update; + +--error ER_LOCK_DEADLOCK +select * from t where i=1 lock in share mode; +rollback; + +connection con1; +reap; +rollback; + +connection con3; +rollback; + +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +disconnect con1; +disconnect con2; +disconnect con3; + +set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; +set global rocksdb_deadlock_detect = @prior_deadlock_detect; +drop table t; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY 6 INDEX_NAME 7 TABLE_NAME +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 0; +--echo # Clears deadlock buffer of any existent deadlocks. +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test index abf8d71911b..887b4dd6a65 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test @@ -70,14 +70,15 @@ while ($cnt) SELECT COUNT(*) FROM t1; +# flush the table first as statistics is calculated a bit differently for memtable and SST files +SET GLOBAL rocksdb_force_flush_memtable_now = 1; + -- disable_query_log -- disable_result_log ANALYZE TABLE t1; -- enable_result_log -- enable_query_log -SET GLOBAL rocksdb_force_flush_memtable_now = 1; - --replace_column 9 # EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100; UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100; @@ -95,8 +96,8 @@ while ($i <= 1000) { eval $insert; } --enable_query_log -analyze table t1; set global rocksdb_force_flush_memtable_now=1; +analyze table t1; --replace_column 9 # explain select * from t1 where key1 = 1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test index a4d26cf7739..2306558ff41 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test @@ -5,7 +5,8 @@ # t/index_merge_innodb.test # -# Index merge tests +# Index merge tests (the test is called 'index_merge_rocksdb2' because +# 'index_merge_rocksdb' has already existed before copying 'index_merge_innodb') # # Last update: # 2006-08-07 ML test refactored (MySQL 5.1) @@ -61,6 +62,7 @@ INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1; -- disable_query_log -- disable_result_log +set global rocksdb_force_flush_memtable_now=1; analyze table t1; -- enable_result_log -- enable_query_log diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt index a0bf5759ec4..3b7d80662db 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt @@ -1,4 +1,5 @@ --rocksdb_write_disable_wal=1 +--rocksdb_flush_log_at_trx_commit=0 --rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=256;level0_stop_writes_trigger=256;max_write_buffer_number=16;compression_per_level=kNoCompression;memtable=vector:1024 --rocksdb_override_cf_options=__system__={memtable=skip_list:16} --rocksdb_compaction_sequential_deletes=0 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test index 4f1927d366c..d75f1e3c2a8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test @@ -8,6 +8,38 @@ SHOW TABLE STATUS LIKE 't1'; INSERT INTO t1 VALUES ('538647864786478647864'); --replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SELECT * FROM t1; +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +DROP TABLE t1; + +CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT); + +INSERT INTO t1 VALUES (5); +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +INSERT INTO t1 VALUES (1000); +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SELECT * FROM t1; +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # SHOW TABLE STATUS LIKE 't1'; --error ER_DUP_ENTRY diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test index f1777ea3e93..5288680c3bd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test @@ -16,16 +16,20 @@ set @@rocksdb_lock_wait_timeout=1; begin; --connection con1 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --error ER_LOCK_WAIT_TIMEOUT insert into t values(0); select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --connection con2 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --error ER_LOCK_WAIT_TIMEOUT insert into t values(0); select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --disconnect con1 --connection default diff --git a/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test new file mode 100644 index 00000000000..c7c5e7b2ef3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test @@ -0,0 +1,53 @@ +--source include/have_rocksdb.inc + +# Basic Sysbench run fails with basic MyROCKS install due to lack of open files + +# test for over limit +CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*"); + +--let $over_rocksdb_max_open_files=`SELECT @@global.open_files_limit + 100` +--let $under_rocksdb_max_open_files=`SELECT @@global.open_files_limit -1` +--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rocksdb.max_open_files.err +--let SEARCH_PATTERN=RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $over_rocksdb_max_open_files over_rocksdb_max_open_files +--let $_mysqld_option=--log-error=$SEARCH_FILE --rocksdb_max_open_files=$over_rocksdb_max_open_files +--source include/restart_mysqld_with_option.inc +--source include/search_pattern_in_file.inc + +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; + +# test for within limit +--let $_mysqld_option=--rocksdb_max_open_files=$under_rocksdb_max_open_files +--source include/restart_mysqld_with_option.inc + +SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files; + +# test for minimal value +--let $_mysqld_option=--rocksdb_max_open_files=0 +--source include/restart_mysqld_with_option.inc + +SELECT @@global.rocksdb_max_open_files; + +# verify that we can still do work with no descriptor cache +CREATE TABLE t1(a INT) ENGINE=ROCKSDB; +INSERT INTO t1 VALUES(0),(1),(2),(3),(4); +SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1; +DROP TABLE t1; + +# test for unlimited +--let $_mysqld_option=--rocksdb_max_open_files=-1 +--source include/restart_mysqld_with_option.inc + +SELECT @@global.rocksdb_max_open_files; + +# test for auto-tune +--let $_mysqld_option=--rocksdb_max_open_files=-2 +--source include/restart_mysqld_with_option.inc + +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; + +# cleanup +--let _$mysqld_option= +--source include/restart_mysqld.inc +--remove_file $SEARCH_FILE diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test index bff44744b1a..fdecd253f53 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test @@ -38,7 +38,7 @@ BEGIN; insert into r1 values (5,5,5,5,5,5,5,5); update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1'; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test rollback; @@ -48,19 +48,19 @@ connection con1; set @save_default_storage_engine=@@global.default_storage_engine; SET GLOBAL default_storage_engine=rocksdb; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test --exec grep "START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT" $MYSQLTEST_VARDIR/mysqld.1/mysqld.log | wc -l # Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect) --echo ==== mysqldump with --innodb-stats-on-metadata ==== ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test # wiping general log so that this test case doesn't fail with --repeat --exec echo "" > $MYSQLTEST_VARDIR/mysqld.1/mysqld.log # testing mysqldump work with statement based binary logging SET GLOBAL binlog_format=statement; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null SET GLOBAL binlog_format=row; drop table r1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test index 3631e703de6..ca9eb5d2ecf 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test @@ -29,7 +29,7 @@ let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add'; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null # verifying block cache was not filled select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add'; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test index f5e7c6ee043..a22478e341c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test @@ -573,7 +573,6 @@ insert into t30 values ('row3', 'row3-key', 'row3-data'), ('row4', 'row4-key', 'row4-data'), ('row5', 'row5-key', 'row5-data'); -analyze table t30; --replace_column 9 # explain @@ -785,7 +784,10 @@ drop table t45; --echo # Now it fails if there is data overlap with what --echo # already exists --echo # -show variables like 'rocksdb%'; +# We exclude rocksdb_max_open_files here because it value is dependent on +# the value of the servers open_file_limit and is expected to be different +# across distros and installs +show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files'; create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb; insert into t47 values (1, 'row1'); insert into t47 values (2, 'row2'); @@ -1101,6 +1103,10 @@ set autocommit = 1; update t1 set a = sleep(100) where pk = 1; --connect (con1,localhost,root,,) + +let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id; +--source include/wait_condition.inc + --echo kill query \$con_id; --disable_query_log eval kill query $con_id; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test new file mode 100644 index 00000000000..7cd4e09e946 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test @@ -0,0 +1,14 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +--echo # +--echo # Issue #728: Assertion `covers_key(b)' failed in int +--echo # myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&, +--echo # const rocksdb::Slice&) +--echo # + +CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT); +INSERT INTO t2(c1)VALUES(0); +SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC; +DROP TABLE t2; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test index 6b8d0b90e90..a7ac236451e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test @@ -15,6 +15,7 @@ while ($i<10000) --enable_query_log analyze table t1; select count(*) from t1; +--replace_column 9 # explain select c1 from t1 where c1 > 5 limit 10; drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test index caffaa25ad9..01060473d9d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test @@ -1,39 +1,36 @@ --source include/have_rocksdb.inc --source include/have_debug.inc -# Write file to make mysql-test-run.pl expect the "crash", but don't restart the -# server until it is told to --let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect -CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB; +create table t1 (pk int primary key) engine=rocksdb; # Create a .frm file without a matching table --exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm -# Restart the server with a .frm file exist but that table is not registered in RocksDB ---exec echo "wait" >$_expect_file_name -shutdown_server 10; ---exec echo "restart" >$_expect_file_name ---sleep 5 ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect +--source include/restart_mysqld.inc + +show tables; # This will append '#sql-test' to the end of new name set session debug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug= "-d,gen_sql_table_name"; +show tables; + # Remove the corresponding .frm files --remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm --remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm # Restart the server with a table registered in RocksDB but does not have a .frm file ---exec echo "wait" >$_expect_file_name -shutdown_server 10; ---exec echo "restart" >$_expect_file_name ---sleep 5 ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect +--source include/restart_mysqld.inc + +show tables; + +# try to recreate a table with the same name +create table t2 (pk int primary key) engine=rocksdb; + +show tables; + +drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test index 5a694b7b222..4e8b081c4d5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test @@ -78,22 +78,28 @@ INSERT INTO t1 values (7); set global rocksdb_debug_ttl_rec_ts = 0; # should return nothing. +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; # disable filtering set global rocksdb_enable_ttl_read_filtering=0; # should return everything +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; -# disable filtering +# enable filtering set global rocksdb_enable_ttl_read_filtering=1; # should return nothing. +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; DROP TABLE t1; @@ -286,28 +292,37 @@ SELECT * FROM t1; # <= shouldn't be filtered out here --echo # Switching to connection 2 connection con2; -# compaction doesn't do anythign since con1 snapshot is still open +# compaction doesn't do anything since con1 snapshot is still open set global rocksdb_force_flush_memtable_now=1; set global rocksdb_compact_cf='default'; # read filtered out, because on a different connection, on # this connection the records have 'expired' already so they are filtered out # even though they have not yet been removed by compaction + +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --echo # Switching to connection 1 connection con1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; # <= shouldn't be filtered out here +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; UPDATE t1 set a = a + 1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; # <= shouldn't be filtered out here +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; COMMIT; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result # <= filtered out here because time has passed. SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; DROP TABLE t1; disconnect con1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc index 2f37e55b8d6..29756bcf29e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc +++ b/storage/rocksdb/mysql-test/rocksdb/t/type_float.inc @@ -57,7 +57,18 @@ INSERT INTO t1 (f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10,pk) VALUES (-9999 --query_vertical SELECT f,f0,r1_1,f23_0,f20_3,d,d1_0,d10_10,d53,d53_10 FROM t1 --sorted_result ---query_vertical SELECT MAX(f), MAX(f0), MAX(r1_1), MAX(f23_0), MAX(f20_3), MAX(d), MAX(d1_0), MAX(d10_10), MAX(d53), MAX(d53_10) FROM t1 +query_vertical +SELECT + CONCAT('', MAX(f)), + CONCAT('', MAX(f0)), + CONCAT('', MAX(r1_1)), + CONCAT('', MAX(f23_0)), + CONCAT('', MAX(f20_3)), + CONCAT('', MAX(d)), + CONCAT('', MAX(d1_0)), + CONCAT('', MAX(d10_10)), + CONCAT('', MAX(d53)), + CONCAT('', MAX(d53_10)) FROM t1; # Invalid values diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test index e45b6836f67..b631615c266 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test @@ -1,9 +1,5 @@ --source include/have_rocksdb.inc ---disable_warnings -drop table if exists t1,t2; ---enable_warnings - # # VARCHAR column types # @@ -73,3 +69,14 @@ select 'email_i' as index_name, count(*) AS count from t force index(email_i); drop table t; set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct; set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums; + +# Issue #784 - Skip trailing space bytes for non-unpackable fields + +drop table if exists t; +create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb; +insert into t(i,h) values('a','b'); +check table t; +alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null; +check table t; +drop table t; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test index f4492b2ab17..94c11df7eef 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test @@ -5,43 +5,51 @@ # caused an assertion in RocksDB. Now it should not be allowed and the # server will not start with that configuration -# Write file to make mysql-test-run.pl expect the "crash", but don't restart -# the server until it is told to ---let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect ---exec echo "wait" >$_expect_file_name -shutdown_server 10; +--let LOG=$MYSQLTEST_VARDIR/tmp/use_direct_reads_writes.err +--let SEARCH_FILE=$LOG -# Clear the log ---exec echo "" >$MYSQLTEST_VARDIR/log/mysqld.1.err +--echo Checking direct reads +--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc -# Attempt to restart the server with invalid options ---exec echo "restart:--rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1" >$_expect_file_name ---sleep 0.1 # Wait 100ms - that is how long the sleep is in check_expected_crash_and_restart ---exec echo "restart:" >$_expect_file_name +--let SEARCH_PATTERN=enable both use_direct_reads +--source include/search_pattern_in_file.inc +--remove_file $LOG + + +# Repeat with direct-writes +--echo Checking direct writes +--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc + +--let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction +--source include/search_pattern_in_file.inc +--remove_file $LOG + + +# Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails +--echo Checking rocksdb_flush_log_at_trx_commit +--let $_mysqld_option=--log-error=$LOG --rocksdb_flush_log_at_trx_commit=1 --rocksdb_allow_mmap_writes=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc + +--let SEARCH_PATTERN=rocksdb_flush_log_at_trx_commit needs to be +--source include/search_pattern_in_file.inc +--remove_file $LOG + + +# Verify rocksdb_flush_log_at_trx_commit cannot be changed if direct writes are used +--echo Validate flush_log settings when direct writes is enabled +--let $_mysqld_option=--rocksdb_flush_log_at_trx_commit=0 --rocksdb_allow_mmap_writes=1 +--source include/restart_mysqld_with_option.inc + +set global rocksdb_flush_log_at_trx_commit=0; +--error ER_WRONG_VALUE_FOR_VAR +set global rocksdb_flush_log_at_trx_commit=1; +--error ER_WRONG_VALUE_FOR_VAR +set global rocksdb_flush_log_at_trx_commit=2; # Cleanup ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect - -# We should now have an error message ---exec grep "enable both use_direct_reads" $MYSQLTEST_VARDIR/log/mysqld.1.err | cut -d] -f2 - -# Repeat with --rocksdb-use-direct-writes ---let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect ---exec echo "wait" >$_expect_file_name -shutdown_server 10; - ---exec echo "" >$MYSQLTEST_VARDIR/log/mysqld.1.err - ---exec echo "restart:--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1" >$_expect_file_name ---sleep 0.1 ---exec echo "restart:" >$_expect_file_name - ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect - ---exec grep "enable both use_direct_io_for_flush_and_compaction" $MYSQLTEST_VARDIR/log/mysqld.1.err | cut -d] -f2 +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test index 16032f8cff7..71672e265c1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test @@ -7,7 +7,8 @@ create table aaa (id int primary key, i int) engine rocksdb; set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit; SET GLOBAL rocksdb_flush_log_at_trx_commit=1; ---exec sleep 5 +insert aaa(id, i) values(0,1); + select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; insert aaa(id, i) values(1,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; @@ -16,18 +17,16 @@ select variable_value-@a from information_schema.global_status where variable_na insert aaa(id, i) values(3,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; -SET GLOBAL rocksdb_flush_log_at_trx_commit=0; ---exec sleep 5 select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=0; insert aaa(id, i) values(4,1); let $status_var=rocksdb_wal_synced; let $status_var_value=`select @a+1`; source include/wait_for_status_var.inc; -SET GLOBAL rocksdb_flush_log_at_trx_commit=2; ---exec sleep 5 select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=2; insert aaa(id, i) values(5,1); let $status_var=rocksdb_wal_synced; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc index 8f03c16e2f1..d983bdf8b58 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc @@ -30,6 +30,7 @@ INSERT INTO t1 VALUES(1, 1); connection slave; --let $slave_sql_errno= 1062 --let $not_switch_connection= 0 +--let $slave_timeout= 120 --source include/wait_for_slave_sql_error_and_skip.inc set global reset_seconds_behind_master=0; --source include/stop_slave_io.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result index 3d734c9498d..89e93f6b8f0 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result @@ -10,6 +10,7 @@ insert into r1 values (1, 1000); set global rocksdb_force_flush_memtable_now=1; include/rpl_start_server.inc [server_number=2] include/start_slave.inc +insert into r1 values (2,2000); delete r1 from r1 force index (i) where id2=1000; select id1,id2 from r1 force index (primary) where id1=1 and id2=1000; id1 id2 diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test index 6143824eea6..ff484171213 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test @@ -62,6 +62,7 @@ SET GLOBAL SYNC_BINLOG = 1; insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush"); +--error 0,2013 SET DEBUG_SYNC='now SIGNAL go'; --source include/wait_until_disconnected.inc --enable_reconnect diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test index 9180afa881f..6d953ead4e9 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test @@ -53,8 +53,14 @@ EOF --source include/rpl_start_server.inc --source include/start_slave.inc + +# Due to the binlogs being truncated, the slave may still think it's processed up to +# the truncated binlog and select master_pos_wait() can return prematurely. Add +# a new transaction to the master to force master_pos_wait() to wait. connection master; +insert into r1 values (2,2000); sync_slave_with_master; + connection slave; delete r1 from r1 force index (i) where id2=1000; select id1,id2 from r1 force index (primary) where id1=1 and id2=1000; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result index 3d76e035e05..9f161b18c05 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result +++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result @@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), +auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, +KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; stop slave; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result index 3d76e035e05..9f161b18c05 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result +++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result @@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), +auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, +KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; stop slave; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py index 20098f49b42..c1d3e7fb81c 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py @@ -95,6 +95,8 @@ LOADERS_READY = 0 REQUEST_ID = 1 REQUEST_ID_LOCK = threading.Lock() +INSERT_ID_SET = set() + def get_next_request_id(): global REQUEST_ID with REQUEST_ID_LOCK: @@ -302,10 +304,19 @@ class PopulateWorker(WorkerThread): execute(self.cur, stmt) if i % 101 == 0: self.con.commit() + check_id(self.con.insert_id()) self.con.commit() + check_id(self.con.insert_id()) logging.info("Inserted %d rows starting at id %d" % (self.num_to_add, self.start_id)) +def check_id(id): + if id == 0: + return + if id in INSERT_ID_SET: + raise Exception("Duplicate auto_inc id %d" % id) + INSERT_ID_SET.add(id) + def populate_table(num_records): logging.info("Populate_table started for %d records" % num_records) @@ -422,6 +433,7 @@ class LoadGenWorker(WorkerThread): execute(self.cur, gen_insert(self.table, idx, self.thread_id, request_id, 0)) self.con.commit() + check_id(self.con.insert_id()) self.id_map.append(request_id) @@ -687,6 +699,7 @@ class LoadGenWorker(WorkerThread): else: self.cur_txn_state = self.TXN_COMMIT_STARTED self.con.commit() + check_id(self.con.insert_id()) if not self.con.get_server_info(): raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR, "Possible connection error on commit") diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test index 7d92bb3f83a..307211a124d 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test @@ -17,6 +17,8 @@ CREATE TABLE t1(id INT PRIMARY KEY, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), + auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test index 6f6128579b5..8ef4c73c3b0 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test @@ -18,6 +18,8 @@ CREATE TABLE t1(id INT PRIMARY KEY, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), + auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result new file mode 100644 index 00000000000..086010dc79e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result @@ -0,0 +1,7 @@ +SET @start_global_value = @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION; +SELECT @start_global_value; +@start_global_value +0 +"Trying to set variable @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION = 444; +ERROR HY000: Variable 'rocksdb_allow_to_start_after_corruption' is a read only variable diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result index ede02afcb60..9af4f730a21 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result @@ -1,7 +1,85 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); SET @start_global_value = @@global.ROCKSDB_BYTES_PER_SYNC; SELECT @start_global_value; @start_global_value 0 -"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_BYTES_PER_SYNC = 444; -ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a read only variable +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 100" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 100; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +100 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 1" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 1; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 0" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 0; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@session.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is not session." +SET @@session.ROCKSDB_BYTES_PER_SYNC = 444; +ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'aaa'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'bbb'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '-1'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '-1'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '101'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '101'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '484436'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '484436'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +SET @@global.ROCKSDB_BYTES_PER_SYNC = @start_global_value; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result deleted file mode 100644 index 165f3811f84..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result +++ /dev/null @@ -1,58 +0,0 @@ -drop table if exists t1; -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) NOT NULL AUTO_INCREMENT, - `b` int(11) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); -SELECT * FROM t1; -a b -1 1 -2 2 -3 3 -set session rocksdb_flush_memtable_on_analyze=off; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW INDEXES FROM t1; -Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment -t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE -set session rocksdb_flush_memtable_on_analyze=on; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW INDEXES FROM t1; -Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment -t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE -DROP TABLE t1; -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) NOT NULL AUTO_INCREMENT, - `b` int(11) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); -SELECT * FROM t1; -a b -1 1 -2 2 -3 3 -SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # 69 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # 24 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL -DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result new file mode 100644 index 00000000000..621213cd79b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result @@ -0,0 +1,14 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(1024); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +SET @start_global_value = @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS; +SELECT @start_global_value; +@start_global_value +1 +"Trying to set variable @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS = 444; +ERROR HY000: Variable 'rocksdb_ignore_unknown_options' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result index b058ebf05f8..60f505310c6 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result @@ -1,7 +1,3 @@ -SET @start_global_value = @@global.ROCKSDB_MAX_OPEN_FILES; -SELECT @start_global_value; -@start_global_value --1 -"Trying to set variable @@global.ROCKSDB_MAX_OPEN_FILES to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_MAX_OPEN_FILES = 444; -ERROR HY000: Variable 'rocksdb_max_open_files' is a read only variable +show variables like 'rocksdb_max_open_files'; +Variable_name Value +rocksdb_max_open_files # diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result index e417e4d5c4e..c925a68d4ed 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result @@ -6,11 +6,11 @@ INSERT INTO invalid_values VALUES('\'aaa\''); SET @start_global_value = @@global.ROCKSDB_MAX_ROW_LOCKS; SELECT @start_global_value; @start_global_value -1073741824 +1048576 SET @start_session_value = @@session.ROCKSDB_MAX_ROW_LOCKS; SELECT @start_session_value; @start_session_value -1073741824 +1048576 '# Setting to valid values in global scope#' "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1; @@ -21,7 +21,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1024" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1024; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@ -31,7 +31,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 '# Setting to valid values in session scope#' "Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1" SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1; @@ -42,7 +42,7 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 "Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1024" SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1024; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@ -52,21 +52,21 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 '# Testing with invalid values in global scope #' "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 'aaa'" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 'aaa'; Got one of the listed errors SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 SET @@global.ROCKSDB_MAX_ROW_LOCKS = @start_global_value; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 SET @@session.ROCKSDB_MAX_ROW_LOCKS = @start_session_value; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result similarity index 54% rename from storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result rename to storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result index 11d4f2363f6..5a19016bf91 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result @@ -3,12 +3,12 @@ INSERT INTO valid_values VALUES(1); INSERT INTO valid_values VALUES(1024); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); -SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE; +SET @start_global_value = @@global.ROCKSDB_TWO_WRITE_QUEUES; SELECT @start_global_value; @start_global_value 1 -"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444; -ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable +"Trying to set variable @@global.ROCKSDB_TWO_WRITE_QUEUES to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_TWO_WRITE_QUEUES = 444; +ERROR HY000: Variable 'rocksdb_two_write_queues' is a read only variable DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result new file mode 100644 index 00000000000..126b4cffe8b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result @@ -0,0 +1,38 @@ +CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options +update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=""; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf2={write_buffer_size=8m;target_file_size_base=2m}; +DROP TABLE t1; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result index 5ad5394db29..ba24fafd0ec 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result @@ -32,10 +32,19 @@ SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL -SET @@global.rocksdb_update_cf_options = 'aaaaa'; +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL +SET @@global.rocksdb_update_cf_options = ''; +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options + +SET @@global.rocksdb_update_cf_options = 'aaaaa';; +ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'aaaaa' +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options + SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'; CF_NAME OPTION_TYPE VALUE default WRITE_BUFFER_SIZE 67108864 @@ -100,7 +109,12 @@ cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8}; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'; CF_NAME OPTION_TYPE VALUE cf1 TARGET_FILE_SIZE_BASE 25165824 -SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';; +ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'default={foo=bar};' +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options +cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8}; +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result index 7da628b73fd..f432f1f7750 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result @@ -1,7 +1,85 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); SET @start_global_value = @@global.ROCKSDB_WAL_BYTES_PER_SYNC; SELECT @start_global_value; @start_global_value 0 -"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 444; -ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a read only variable +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 100" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 100; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +100 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 1" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 1; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 0" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 0; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@session.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is not session." +SET @@session.ROCKSDB_WAL_BYTES_PER_SYNC = 444; +ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'aaa'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'bbb'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '-1'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '-1'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '101'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '101'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '484436'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '484436'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = @start_global_value; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test new file mode 100644 index 00000000000..64fb2458424 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test @@ -0,0 +1,6 @@ +--source include/have_rocksdb.inc + +--let $sys_var=ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test index fbe8039277e..91da5b52d12 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test @@ -1,7 +1,22 @@ --source include/have_rocksdb.inc +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); + --let $sys_var=ROCKSDB_BYTES_PER_SYNC ---let $read_only=1 +--let $read_only=0 --let $session=0 --source ../include/rocksdb_sys_var.inc +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test deleted file mode 100644 index c7e04f89498..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test +++ /dev/null @@ -1,46 +0,0 @@ ---source include/have_rocksdb.inc - ---disable_warnings -drop table if exists t1; ---enable_warnings - -## -## test cardinality for analyze statements after flushing table -## - -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); ---sorted_result -SELECT * FROM t1; - -set session rocksdb_flush_memtable_on_analyze=off; -ANALYZE TABLE t1; -SHOW INDEXES FROM t1; - -set session rocksdb_flush_memtable_on_analyze=on; -ANALYZE TABLE t1; -SHOW INDEXES FROM t1; -DROP TABLE t1; - -## -## test data length for show table status statements for tables with few rows -## - -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); ---sorted_result -SELECT * FROM t1; - ---replace_column 5 # 6 # -SHOW TABLE STATUS LIKE 't1'; -ANALYZE TABLE t1; ---replace_column 5 # 6 # -SHOW TABLE STATUS LIKE 't1'; - -DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test new file mode 100644 index 00000000000..f10ff2c6123 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test @@ -0,0 +1,16 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(1024); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); + +--let $sys_var=ROCKSDB_IGNORE_UNKNOWN_OPTIONS +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test index 47155b15137..36996761507 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test @@ -1,6 +1,8 @@ --source include/have_rocksdb.inc ---let $sys_var=ROCKSDB_MAX_OPEN_FILES ---let $read_only=1 ---let $session=0 ---source ../include/rocksdb_sys_var.inc +# We can not use rocksdb_sys_var.inc here as this is a global, read only option +# whose value is dependent on the servers open_files_limit. It is more fully +# tested in the rocksdb.max_open_files test. + +--replace_column 2 # +show variables like 'rocksdb_max_open_files'; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test similarity index 90% rename from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test rename to storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test index 451653fe769..43579faba82 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test @@ -7,7 +7,7 @@ INSERT INTO valid_values VALUES(1024); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); ---let $sys_var=ROCKSDB_CONCURRENT_PREPARE +--let $sys_var=ROCKSDB_TWO_WRITE_QUEUES --let $read_only=1 --let $session=0 --source ../include/rocksdb_sys_var.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test new file mode 100644 index 00000000000..03626260cab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test @@ -0,0 +1,22 @@ +--source include/have_rocksdb.inc + +CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SELECT @@global.rocksdb_update_cf_options; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=""; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +DROP TABLE t1; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test index 15d5d870ae6..382338a2d40 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test @@ -38,8 +38,17 @@ SELECT @@global.rocksdb_update_cf_options; SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; +# Make sure that we do not double free the NULL string +SET @@global.rocksdb_update_cf_options = NULL; +SELECT @@global.rocksdb_update_cf_options; + +# Attempt setting an empty string +SET @@global.rocksdb_update_cf_options = ''; +SELECT @@global.rocksdb_update_cf_options; + # Will fail to parse. Value not updated. -SET @@global.rocksdb_update_cf_options = 'aaaaa'; +--Error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.rocksdb_update_cf_options = 'aaaaa'; SELECT @@global.rocksdb_update_cf_options; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'; @@ -86,9 +95,13 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL # Will fail to parse. No valid assignments included. Value not updated and # reset to NULL. -SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +--Error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +SELECT @@global.rocksdb_update_cf_options; + +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; USE test; -DROP TABLE t1; \ No newline at end of file +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test index 5296a37489f..5051c12e753 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test @@ -1,6 +1,22 @@ --source include/have_rocksdb.inc +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); + --let $sys_var=ROCKSDB_WAL_BYTES_PER_SYNC ---let $read_only=1 +--let $read_only=0 --let $session=0 --source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc index 9b7fb488ffc..a74c97c61d8 100644 --- a/storage/rocksdb/properties_collector.cc +++ b/storage/rocksdb/properties_collector.cc @@ -48,17 +48,9 @@ Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, : m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr), m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l), m_file_size(0), m_params(params), - m_table_stats_sampling_pct(table_stats_sampling_pct), - m_seed(time(nullptr)), m_card_adj_extra(1.) { + m_cardinality_collector(table_stats_sampling_pct) { DBUG_ASSERT(ddl_manager != nullptr); - // We need to adjust the index cardinality numbers based on the sampling - // rate so that the output of "SHOW INDEX" command will reflect reality - // more closely. It will still be an approximation, just a better one. - if (m_table_stats_sampling_pct > 0) { - m_card_adj_extra = 100. / m_table_stats_sampling_pct; - } - m_deleted_rows_window.resize(m_params.m_window, false); } @@ -141,7 +133,7 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) { m_last_stats->m_name = m_keydef->get_name(); } } - m_last_key.clear(); + m_cardinality_collector.Reset(); } return m_last_stats; @@ -151,7 +143,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, const rocksdb::Slice &value, const rocksdb::EntryType &type, const uint64_t &file_size) { - const auto stats = AccessStats(key); + auto stats = AccessStats(key); stats->m_data_size += key.size() + value.size(); @@ -177,38 +169,15 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, sql_print_error("RocksDB: Unexpected entry type found: %u. " "This should not happen so aborting the system.", type); - abort_with_stack_traces(); + abort(); break; } stats->m_actual_disk_size += file_size - m_file_size; m_file_size = file_size; - if (m_keydef != nullptr && ShouldCollectStats()) { - std::size_t column = 0; - bool new_key = true; - - if (!m_last_key.empty()) { - rocksdb::Slice last(m_last_key.data(), m_last_key.size()); - new_key = (m_keydef->compare_keys(&last, &key, &column) == 0); - } - - if (new_key) { - DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size()); - - for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) { - stats->m_distinct_keys_per_prefix[i]++; - } - - // assign new last_key for the next call - // however, we only need to change the last key - // if one of the first n-1 columns is different - // If the n-1 prefix is the same, no sense in storing - // the new key - if (column < stats->m_distinct_keys_per_prefix.size()) { - m_last_key.assign(key.data(), key.size()); - } - } + if (m_keydef != nullptr) { + m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats); } } @@ -255,8 +224,10 @@ Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) { rocksdb_num_sst_entry_other += num_sst_entry_other; } - properties->insert({INDEXSTATS_KEY, - Rdb_index_stats::materialize(m_stats, m_card_adj_extra)}); + for (Rdb_index_stats &stat : m_stats) { + m_cardinality_collector.AdjustStats(&stat); + } + properties->insert({INDEXSTATS_KEY, Rdb_index_stats::materialize(m_stats)}); return rocksdb::Status::OK(); } @@ -266,23 +237,6 @@ bool Rdb_tbl_prop_coll::NeedCompact() const { (m_max_deleted_rows > m_params.m_deletes); } -bool Rdb_tbl_prop_coll::ShouldCollectStats() { - // Zero means that we'll use all the keys to update statistics. - if (!m_table_stats_sampling_pct || - RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct) { - return true; - } - - const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX - - RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + - RDB_TBL_STATS_SAMPLE_PCT_MIN; - - DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN); - DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX); - - return val <= m_table_stats_sampling_pct; -} - /* Returns the same as above, but in human-readable way for logging */ @@ -359,8 +313,7 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props( Serializes an array of Rdb_index_stats into a network string. */ std::string -Rdb_index_stats::materialize(const std::vector &stats, - const float card_adj_extra) { +Rdb_index_stats::materialize(const std::vector &stats) { String ret; rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES); for (const auto &i : stats) { @@ -376,8 +329,7 @@ Rdb_index_stats::materialize(const std::vector &stats, rdb_netstr_append_uint64(&ret, i.m_entry_merges); rdb_netstr_append_uint64(&ret, i.m_entry_others); for (const auto &num_keys : i.m_distinct_keys_per_prefix) { - const float upd_num_keys = num_keys * card_adj_extra; - rdb_netstr_append_uint64(&ret, static_cast(upd_num_keys)); + rdb_netstr_append_uint64(&ret, num_keys); } } @@ -410,7 +362,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s, sql_print_error("Index stats version %d was outside of supported range. " "This should not happen so aborting the system.", version); - abort_with_stack_traces(); + abort(); } size_t needed = sizeof(stats.m_gl_index_id.cf_id) + @@ -515,4 +467,75 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment, } } +Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct) + : m_table_stats_sampling_pct(table_stats_sampling_pct), + m_seed(time(nullptr)) {} + +bool Rdb_tbl_card_coll::IsSampingDisabled() { + // Zero means that we'll use all the keys to update statistics. + return m_table_stats_sampling_pct == 0 || + RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct; +} + +bool Rdb_tbl_card_coll::ShouldCollectStats() { + if (IsSampingDisabled()) { + return true; // collect every key + } + + const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX - + RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + + RDB_TBL_STATS_SAMPLE_PCT_MIN; + + DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN); + DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX); + + return val <= m_table_stats_sampling_pct; +} + +void Rdb_tbl_card_coll::ProcessKey(const rocksdb::Slice &key, + const Rdb_key_def *keydef, + Rdb_index_stats *stats) { + if (ShouldCollectStats()) { + std::size_t column = 0; + bool new_key = true; + + if (!m_last_key.empty()) { + rocksdb::Slice last(m_last_key.data(), m_last_key.size()); + new_key = (keydef->compare_keys(&last, &key, &column) == 0); + } + + if (new_key) { + DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size()); + + for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) { + stats->m_distinct_keys_per_prefix[i]++; + } + + // assign new last_key for the next call + // however, we only need to change the last key + // if one of the first n-1 columns is different + // If the n-1 prefix is the same, no sense in storing + // the new key + if (column < stats->m_distinct_keys_per_prefix.size()) { + m_last_key.assign(key.data(), key.size()); + } + } + } +} + +void Rdb_tbl_card_coll::Reset() { m_last_key.clear(); } + +// We need to adjust the index cardinality numbers based on the sampling +// rate so that the output of "SHOW INDEX" command will reflect reality +// more closely. It will still be an approximation, just a better one. +void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) { + if (IsSampingDisabled()) { + // no sampling was done, return as stats is + return; + } + for (int64_t &num_keys : stats->m_distinct_keys_per_prefix) { + num_keys = num_keys * 100 / m_table_stats_sampling_pct; + } +} + } // namespace myrocks diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h index 9ae519d95c7..1441d893420 100644 --- a/storage/rocksdb/properties_collector.h +++ b/storage/rocksdb/properties_collector.h @@ -56,8 +56,7 @@ struct Rdb_index_stats { std::vector m_distinct_keys_per_prefix; std::string m_name; // name is not persisted - static std::string materialize(const std::vector &stats, - const float card_adj_extra); + static std::string materialize(const std::vector &stats); static int unmaterialize(const std::string &s, std::vector *const ret); @@ -71,6 +70,40 @@ struct Rdb_index_stats { const int64_t &estimated_data_len = 0); }; +// The helper class to calculate index cardinality +class Rdb_tbl_card_coll { + public: + explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct); + + public: + void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef, + Rdb_index_stats *stats); + /* + * Resets the state of the collector to start calculating statistics for a + * next index. + */ + void Reset(); + + /* + * Cardinality statistics might be calculated using some sampling strategy. + * This method adjusts gathered statistics according to the sampling + * strategy used. Note that adjusted cardinality value is just an estimate + * and can return a value exeeding number of rows in a table, so the + * returned value should be capped by row count before using it by + * an optrimizer or displaying it to a clent. + */ + void AdjustStats(Rdb_index_stats *stats); + + private: + bool ShouldCollectStats(); + bool IsSampingDisabled(); + + private: + std::string m_last_key; + uint8_t m_table_stats_sampling_pct; + unsigned int m_seed; +}; + class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector { public: Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, @@ -130,9 +163,7 @@ private: uint64_t m_rows, m_window_pos, m_deleted_rows, m_max_deleted_rows; uint64_t m_file_size; Rdb_compact_params m_params; - uint8_t m_table_stats_sampling_pct; - unsigned int m_seed; - float m_card_adj_extra; + Rdb_tbl_card_coll m_cardinality_collector; }; class Rdb_tbl_prop_coll_factory diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h index 3ba0ca99259..d3d75806a5e 100644 --- a/storage/rocksdb/rdb_buff.h +++ b/storage/rocksdb/rdb_buff.h @@ -361,7 +361,7 @@ public: } void truncate(const size_t &pos) { - DBUG_ASSERT(0 <= pos && pos < m_data.size()); + DBUG_ASSERT(pos < m_data.size()); m_data.resize(pos); } diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc index 8f183882277..a63d8d2ab86 100644 --- a/storage/rocksdb/rdb_cf_options.cc +++ b/storage/rocksdb/rdb_cf_options.cc @@ -323,6 +323,13 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) { } } +std::shared_ptr +Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) { + return (cf_name == DEFAULT_SYSTEM_CF_NAME) + ? std::make_shared() + : nullptr; +} + void Rdb_cf_options::get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) { DBUG_ASSERT(opts != nullptr); @@ -332,6 +339,7 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name, // Set the comparator according to 'rev:' opts->comparator = get_cf_comparator(cf_name); + opts->merge_operator = get_cf_merge_operator(cf_name); } } // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h index 32f2308284f..19e5da6a79e 100644 --- a/storage/rocksdb/rdb_cf_options.h +++ b/storage/rocksdb/rdb_cf_options.h @@ -64,6 +64,9 @@ public: static const rocksdb::Comparator * get_cf_comparator(const std::string &cf_name); + std::shared_ptr + get_cf_merge_operator(const std::string &cf_name); + void get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) MY_ATTRIBUTE((__nonnull__)); diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h index c457fba4af2..4b1c8c73224 100644 --- a/storage/rocksdb/rdb_compact_filter.h +++ b/storage/rocksdb/rdb_compact_filter.h @@ -166,7 +166,7 @@ public: sql_print_error("Decoding ttl from PK value failed in compaction filter, " "for index (%u,%u), val: %s", m_prev_index.cf_id, m_prev_index.index_id, buf.c_str()); - abort_with_stack_traces(); + abort(); } /* diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index 1694bee7d76..1e5a61f75f0 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -805,6 +806,25 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) { return changed; } +/* + @return Number of bytes that were changed +*/ +int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) { + DBUG_ASSERT(packed_tuple != nullptr); + + int changed = 0; + uchar *p = packed_tuple + len - 1; + for (; p > packed_tuple; p--) { + changed++; + if (*p != uchar(0x00)) { + *p = *p - 1; + break; + } + *p = 0xFF; + } + return changed; +} + static const std::map UNPACK_HEADER_SIZES = { {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE}, {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}}; @@ -1403,11 +1423,11 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, MY_BITMAP covered_bitmap; my_bitmap_map covered_bits; uint curr_bitmap_pos = 0; - bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false); const bool has_covered_bitmap = has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG); if (has_covered_bitmap) { + bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false); covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header + sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE); @@ -1481,6 +1501,18 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, } if ((this->*fpi->m_skip_func)(fpi, field, &reader)) return HA_ERR_ROCKSDB_CORRUPT_DATA; + + // If this is a space padded varchar, we need to skip the indicator + // bytes for trailing bytes. They're useless since we can't restore the + // field anyway. + // + // There is a special case for prefixed varchars where we do not + // generate unpack info, because we know prefixed varchars cannot be + // unpacked. In this case, it is not necessary to skip. + if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad && + !fpi->m_unpack_info_stores_value) { + unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1); + } } } @@ -3459,6 +3491,20 @@ void Rdb_tbl_def::set_name(const std::string &name) { check_if_is_mysql_system_table(); } +GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() { + for (uint i = 0; i < m_key_count; i++) { + auto &k = m_key_descr_arr[i]; + if (k->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY || + k->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY) { + return k->get_gl_index_id(); + } + } + + // Every table must have a primary key, even if it's hidden. + abort(); + return GL_INDEX_ID(); +} + /* Static function of type my_hash_get_key that gets invoked by the m_ddl_hash object of type my_core::HASH. @@ -3683,6 +3729,68 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir, return result; } +/* + Validate that all auto increment values in the data dictionary are on a + supported version. +*/ +bool Rdb_ddl_manager::validate_auto_incr() { + std::unique_ptr it(m_dict->new_iterator()); + + uchar auto_incr_entry[Rdb_key_def::INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(auto_incr_entry, Rdb_key_def::AUTO_INC); + const rocksdb::Slice auto_incr_entry_slice( + reinterpret_cast(auto_incr_entry), + Rdb_key_def::INDEX_NUMBER_SIZE); + for (it->Seek(auto_incr_entry_slice); it->Valid(); it->Next()) { + const rocksdb::Slice key = it->key(); + const rocksdb::Slice val = it->value(); + GL_INDEX_ID gl_index_id; + + if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE && + memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) + break; + + if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) { + return false; + } + + if (val.size() <= Rdb_key_def::VERSION_SIZE) { + return false; + } + + // Check if we have orphaned entries for whatever reason by cross + // referencing ddl entries. + auto ptr = reinterpret_cast(key.data()); + ptr += Rdb_key_def::INDEX_NUMBER_SIZE; + rdb_netbuf_read_gl_index(&ptr, &gl_index_id); + if (!m_dict->get_index_info(gl_index_id, nullptr)) { + // NO_LINT_DEBUG + sql_print_warning("RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "but does not exist as a DDL entry", + gl_index_id.cf_id, gl_index_id.index_id); + return false; + } + + ptr = reinterpret_cast(val.data()); + const int version = rdb_netbuf_read_uint16(&ptr); + if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) { + // NO_LINT_DEBUG + sql_print_warning("RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "is on unsupported version %d", + gl_index_id.cf_id, gl_index_id.index_id, version); + return false; + } + } + + if (!it->status().ok()) { + return false; + } + + return true; +} + /* Validate that all the tables in the RocksDB database dictionary match the .frm files in the datadir @@ -3847,10 +3955,18 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, If validate_tables is greater than 0 run the validation. Only fail the initialzation if the setting is 1. If the setting is 2 we continue. */ - if (validate_tables > 0 && !validate_schemas()) { - if (validate_tables == 1) { - sql_print_error("RocksDB: Problems validating data dictionary " - "against .frm files, exiting"); + if (validate_tables > 0) { + std::string msg; + if (!validate_schemas()) { + msg = "RocksDB: Problems validating data dictionary " + "against .frm files, exiting"; + } else if (!validate_auto_incr()) { + msg = "RocksDB: Problems validating auto increment values in " + "data dictionary, exiting"; + } + if (validate_tables == 1 && !msg.empty()) { + // NO_LINT_DEBUG + sql_print_error("%s", msg.c_str()); return true; } } @@ -4124,6 +4240,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, new_rec->m_auto_incr_val = rec->m_auto_incr_val.load(std::memory_order_relaxed); new_rec->m_key_descr_arr = rec->m_key_descr_arr; + // so that it's not free'd when deleting the old rec rec->m_key_descr_arr = nullptr; @@ -4555,13 +4672,16 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch, const GL_INDEX_ID &gl_index_id) const { delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id); delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id); + delete_with_prefix(batch, Rdb_key_def::AUTO_INC, gl_index_id); } bool Rdb_dict_manager::get_index_info( const GL_INDEX_ID &gl_index_id, struct Rdb_index_info *const index_info) const { - index_info->m_gl_index_id = gl_index_id; + if (index_info) { + index_info->m_gl_index_id = gl_index_id; + } bool found = false; bool error = false; @@ -4572,6 +4692,10 @@ bool Rdb_dict_manager::get_index_info( const rocksdb::Status &status = get_value(key, &value); if (status.ok()) { + if (!index_info) { + return true; + } + const uchar *const val = (const uchar *)value.c_str(); const uchar *ptr = val; index_info->m_index_dict_version = rdb_netbuf_to_uint16(val); @@ -4610,6 +4734,11 @@ bool Rdb_dict_manager::get_index_info( index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); ptr += RDB_SIZEOF_KV_VERSION; index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); + if ((index_info->m_kv_version == + Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) && + index_info->m_ttl_duration > 0) { + index_info->m_index_flags = Rdb_key_def::TTL_FLAG; + } found = true; break; @@ -4651,7 +4780,7 @@ bool Rdb_dict_manager::get_index_info( "and it may be a bug.", index_info->m_index_dict_version, index_info->m_index_type, index_info->m_kv_version, index_info->m_ttl_duration); - abort_with_stack_traces(); + abort(); } return found; @@ -4914,7 +5043,7 @@ void Rdb_dict_manager::resume_drop_indexes() const { "bug.", max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id); - abort_with_stack_traces(); + abort(); } } } @@ -4963,7 +5092,7 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id, "from index id (%u,%u). MyRocks data dictionary may " "get corrupted.", gl_index_id.cf_id, gl_index_id.index_id); - abort_with_stack_traces(); + abort(); } } } @@ -5021,7 +5150,7 @@ void Rdb_dict_manager::add_stats( // IndexStats::materialize takes complete care of serialization including // storing the version const auto value = - Rdb_index_stats::materialize(std::vector{it}, 1.); + Rdb_index_stats::materialize(std::vector{it}); batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)), value); @@ -5047,6 +5176,53 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const { return Rdb_index_stats(); } +rocksdb::Status +Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch, + const GL_INDEX_ID &gl_index_id, + ulonglong val, bool overwrite) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); + const rocksdb::Slice key = + rocksdb::Slice(reinterpret_cast(key_buf), sizeof(key_buf)); + + // Value is constructed by storing the version and the value. + uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0}; + uchar *ptr = value_buf; + rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION); + ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION; + rdb_netbuf_store_uint64(ptr, val); + ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE; + const rocksdb::Slice value = + rocksdb::Slice(reinterpret_cast(value_buf), ptr - value_buf); + + if (overwrite) { + return batch->Put(m_system_cfh, key, value); + } + return batch->Merge(m_system_cfh, key, value); +} + +bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); + + std::string value; + const rocksdb::Status status = get_value( + rocksdb::Slice(reinterpret_cast(key_buf), sizeof(key_buf)), + &value); + + if (status.ok()) { + const uchar *const val = reinterpret_cast(value.data()); + + if (rdb_netbuf_to_uint16(val) <= Rdb_key_def::AUTO_INCREMENT_VERSION) { + *new_val = rdb_netbuf_to_uint64(val + RDB_SIZEOF_AUTO_INCREMENT_VERSION); + return true; + } + } + return false; +} + uint Rdb_seq_generator::get_and_update_next_number( Rdb_dict_manager *const dict) { DBUG_ASSERT(dict != nullptr); diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 987647f13c6..0a7d5e0f0a8 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -132,6 +132,7 @@ const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16); const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar); const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16); const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32); +const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16); // Possible return values for rdb_index_field_unpack_t functions. enum { @@ -231,17 +232,28 @@ public: *size = INDEX_NUMBER_SIZE; } + /* Get the first key that you need to position at to start iterating. + Returns a "supremum" or "infimum" for this index based on collation order + */ + inline void get_first_key(uchar *const key, uint *const size) const { + return m_is_reverse_cf ? get_supremum_key(key, size) + : get_infimum_key(key, size); + } + /* Make a key that is right after the given key. */ static int successor(uchar *const packed_tuple, const uint &len); + /* Make a key that is right before the given key. */ + static int predecessor(uchar *const packed_tuple, const uint &len); + /* This can be used to compare prefixes. if X is a prefix of Y, then we consider that X = Y. */ // b describes the lookup key, which can be a prefix of a. + // b might be outside of the index_number range, if successor() is called. int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const { DBUG_ASSERT(covers_key(a)); - DBUG_ASSERT(covers_key(b)); return memcmp(a.data(), b.data(), std::min(a.size(), b.size())); } @@ -377,6 +389,7 @@ public: INDEX_STATISTICS = 6, MAX_INDEX_ID = 7, DDL_CREATE_INDEX_ONGOING = 8, + AUTO_INC = 9, END_DICT_INDEX_ID = 255 }; @@ -389,6 +402,7 @@ public: DDL_DROP_INDEX_ONGOING_VERSION = 1, MAX_INDEX_ID_VERSION = 1, DDL_CREATE_INDEX_ONGOING_VERSION = 1, + AUTO_INCREMENT_VERSION = 1, // Version for index stats is stored in IndexStats struct }; @@ -962,17 +976,17 @@ public: Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete; explicit Rdb_tbl_def(const std::string &name) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(name); } Rdb_tbl_def(const char *const name, const size_t &len) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(name, len)); } explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(slice.data() + pos, slice.size() - pos)); } @@ -985,7 +999,7 @@ public: std::shared_ptr *m_key_descr_arr; std::atomic m_hidden_pk_val; - std::atomic m_auto_incr_val; + std::atomic m_auto_incr_val; /* Is this a system table */ bool m_is_mysql_system_table; @@ -997,6 +1011,7 @@ public: const std::string &base_dbname() const { return m_dbname; } const std::string &base_tablename() const { return m_tablename; } const std::string &base_partition() const { return m_partition; } + GL_INDEX_ID get_autoincr_gl_index_id(); }; /* @@ -1108,6 +1123,8 @@ private: static void free_hash_elem(void *const data); bool validate_schemas(); + + bool validate_auto_incr(); }; /* @@ -1172,8 +1189,9 @@ private: 2. internal cf_id, index id => index information key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id - value: version, index_type, kv_format_version, ttl_duration + value: version, index_type, kv_format_version, index_flags, ttl_duration index_type is 1 byte, version and kv_format_version are 2 bytes. + index_flags is 4 bytes. ttl_duration is 8 bytes. 3. CF id => CF flags @@ -1202,6 +1220,11 @@ private: key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id value: version + 9. auto_increment values + key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id + value: version, {max auto_increment so far} + max auto_increment is 8 bytes + Data dictionary operations are atomic inside RocksDB. For example, when creating a table with two indexes, it is necessary to call Put three times. They have to be atomic. Rdb_dict_manager has a wrapper function @@ -1343,6 +1366,13 @@ public: void add_stats(rocksdb::WriteBatch *const batch, const std::vector &stats) const; Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const; + + rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch, + const GL_INDEX_ID &gl_index_id, + ulonglong val, + bool overwrite = false) const; + bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const; }; struct Rdb_index_info { @@ -1354,4 +1384,107 @@ struct Rdb_index_info { uint64 m_ttl_duration = 0; }; +/* + @brief + Merge Operator for the auto_increment value in the system_cf + + @detail + This class implements the rocksdb Merge Operator for auto_increment values + that are stored to the data dictionary every transaction. + + The actual Merge function is triggered on compaction, memtable flushes, or + when get() is called on the same key. + + */ +class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator { + public: + /* + Updates the new value associated with a key to be the maximum of the + passed in value and the existing value. + + @param[IN] key + @param[IN] existing_value existing value for a key; nullptr if nonexistent + key + @param[IN] value + @param[OUT] new_value new value after Merge + @param[IN] logger + */ + bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value, + const rocksdb::Slice &value, std::string *new_value, + rocksdb::Logger *logger) const override { + DBUG_ASSERT(new_value != nullptr); + + if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 || + GetKeyType(key) != Rdb_key_def::AUTO_INC || + value.size() != + RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE || + GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) { + abort(); + } + + uint64_t merged_value = Deserialize(value); + + if (existing_value != nullptr) { + if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE || + GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) { + abort(); + } + + merged_value = std::max(merged_value, Deserialize(*existing_value)); + } + Serialize(merged_value, new_value); + return true; + } + + virtual const char *Name() const override { return "Rdb_system_merge_op"; } + + private: + /* + Serializes the integer data to the new_value buffer or the target buffer + the merge operator will update to + */ + void Serialize(const uint64_t data, std::string *new_value) const { + uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0}; + uchar *ptr = value_buf; + /* fill in the auto increment version */ + rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION); + ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION; + /* fill in the auto increment value */ + rdb_netbuf_store_uint64(ptr, data); + ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE; + new_value->assign(reinterpret_cast(value_buf), ptr - value_buf); + } + + /* + Gets the value of auto_increment type in the data dictionary from the + value slice + + @Note Only to be used on data dictionary keys for the auto_increment type + */ + uint64_t Deserialize(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint64(reinterpret_cast(s.data()) + + RDB_SIZEOF_AUTO_INCREMENT_VERSION); + } + + /* + Gets the type of the key of the key in the data dictionary. + + @Note Only to be used on data dictionary keys for the auto_increment type + */ + uint16_t GetKeyType(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint32(reinterpret_cast(s.data())); + } + + /* + Gets the version of the auto_increment value in the data dictionary. + + @Note Only to be used on data dictionary value for the auto_increment type + */ + uint16_t GetVersion(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint16(reinterpret_cast(s.data())); + } +}; + } // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index 15e4393b41e..8c097a5649f 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -782,7 +782,7 @@ static int rdb_i_s_global_info_fill_table( "from CF with id = %u. MyRocks data dictionary may " "be corrupted.", cf_handle->GetID()); - abort_with_stack_traces(); + abort(); } snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID()); @@ -913,7 +913,10 @@ enum { INDEX_NUMBER, INDEX_TYPE, KV_FORMAT_VERSION, - CF + TTL_DURATION, + INDEX_FLAGS, + CF, + AUTO_INCREMENT }; } // namespace RDB_DDL_FIELD @@ -928,7 +931,11 @@ static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = { ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), + ROCKSDB_FIELD_INFO("TTL_DURATION", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("INDEX_FLAGS", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0), ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("AUTO_INCREMENT", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, + MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), ROCKSDB_FIELD_INFO_END}; int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { @@ -939,6 +946,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { DBUG_ASSERT(m_table != nullptr); Field **field = m_table->field; DBUG_ASSERT(field != nullptr); + const Rdb_dict_manager *dict_manager = rdb_get_dict_manager(); const std::string &dbname = tdef->base_dbname(); field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(), @@ -969,10 +977,20 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true); field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version, true); + field[RDB_DDL_FIELD::TTL_DURATION]->store(kd.m_ttl_duration, true); + field[RDB_DDL_FIELD::INDEX_FLAGS]->store(kd.m_index_flags_bitmap, true); std::string cf_name = kd.get_cf()->GetName(); field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(), system_charset_info); + ulonglong auto_incr; + if (dict_manager->get_auto_incr_val(tdef->get_autoincr_gl_index_id(), + &auto_incr)) { + field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_notnull(); + field[RDB_DDL_FIELD::AUTO_INCREMENT]->store(auto_incr, true); + } else { + field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_null(); + } ret = my_core::schema_table_store_record(m_thd, m_table); if (ret) @@ -1458,6 +1476,117 @@ static int rdb_i_s_trx_info_init(void *const p) { DBUG_RETURN(0); } +/* + Support for INFORMATION_SCHEMA.ROCKSDB_DEADLOCK dynamic table + */ +namespace RDB_DEADLOCK_FIELD { +enum { + DEADLOCK_ID = 0, + TRANSACTION_ID, + CF_NAME, + WAITING_KEY, + LOCK_TYPE, + INDEX_NAME, + TABLE_NAME, + ROLLED_BACK +}; +} // namespace RDB_TRX_FIELD + +static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = { + ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("LOCK_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("ROLLED_BACK", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO_END}; + +/* Fill the information_schema.rocksdb_trx virtual table */ +static int rdb_i_s_deadlock_info_fill_table( + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(tables != nullptr); + DBUG_ASSERT(tables->table != nullptr); + DBUG_ASSERT(tables->table->field != nullptr); + + static const std::string str_exclusive("EXCLUSIVE"); + static const std::string str_shared("SHARED"); + + int ret = 0; + rocksdb::DB *const rdb = rdb_get_rocksdb_db(); + + if (!rdb) { + DBUG_RETURN(ret); + } + + const std::vector &all_dl_info = rdb_get_deadlock_info(); + + ulonglong id = 0; + for (const auto &info : all_dl_info) { + for (const auto &trx_info : info.path) { + tables->table->field[RDB_DEADLOCK_FIELD::DEADLOCK_ID]->store(id, true); + tables->table->field[RDB_DEADLOCK_FIELD::TRANSACTION_ID]->store( + trx_info.trx_id, true); + tables->table->field[RDB_DEADLOCK_FIELD::CF_NAME]->store( + trx_info.cf_name.c_str(), trx_info.cf_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::WAITING_KEY]->store( + trx_info.waiting_key.c_str(), trx_info.waiting_key.length(), + system_charset_info); + if (trx_info.exclusive_lock) { + tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store( + str_exclusive.c_str(), str_exclusive.length(), system_charset_info); + } else { + tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store( + str_shared.c_str(), str_shared.length(), system_charset_info); + } + tables->table->field[RDB_DEADLOCK_FIELD::INDEX_NAME]->store( + trx_info.index_name.c_str(), trx_info.index_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::TABLE_NAME]->store( + trx_info.table_name.c_str(), trx_info.table_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::ROLLED_BACK]->store( + trx_info.trx_id == info.victim_trx_id, true); + + /* Tell MySQL about this row in the virtual table */ + ret = static_cast( + my_core::schema_table_store_record(thd, tables->table)); + + if (ret != 0) { + break; + } + } + id++; + } + + DBUG_RETURN(ret); +} + +/* Initialize the information_schema.rocksdb_trx_info virtual table */ +static int rdb_i_s_deadlock_info_init(void *const p) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(p != nullptr); + + my_core::ST_SCHEMA_TABLE *schema; + + schema = (my_core::ST_SCHEMA_TABLE *)p; + + schema->fields_info = rdb_i_s_deadlock_info_fields_info; + schema->fill_table = rdb_i_s_deadlock_info_fill_table; + + DBUG_RETURN(0); +} + static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) { DBUG_ENTER_FUNC(); DBUG_RETURN(0); @@ -1641,4 +1770,20 @@ struct st_mysql_plugin rdb_i_s_trx_info = { nullptr, /* config options */ 0, /* flags */ }; + +struct st_mysql_plugin rdb_i_s_deadlock_info = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_DEADLOCK", + "Facebook", + "RocksDB transaction information", + PLUGIN_LICENSE_GPL, + rdb_i_s_deadlock_info_init, + nullptr, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + 0, /* flags */ +}; } // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h index c684464c996..4e72a1e1d03 100644 --- a/storage/rocksdb/rdb_i_s.h +++ b/storage/rocksdb/rdb_i_s.h @@ -32,4 +32,5 @@ extern struct st_mysql_plugin rdb_i_s_ddl; extern struct st_mysql_plugin rdb_i_s_index_file_map; extern struct st_mysql_plugin rdb_i_s_lock_info; extern struct st_mysql_plugin rdb_i_s_trx_info; +extern struct st_mysql_plugin rdb_i_s_deadlock_info; } // namespace myrocks diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc index 7b229eee47d..039ab05c2ed 100644 --- a/storage/rocksdb/rdb_io_watchdog.cc +++ b/storage/rocksdb/rdb_io_watchdog.cc @@ -42,7 +42,7 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) { "Shutting the service down.", m_write_timeout); - abort_with_stack_traces(); + abort(); } void Rdb_io_watchdog::io_check_callback(union sigval timer_data) { diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc index 91f4a7d6ae4..bf7fd199d93 100644 --- a/storage/rocksdb/rdb_perf_context.cc +++ b/storage/rocksdb/rdb_perf_context.cc @@ -44,8 +44,13 @@ std::string rdb_pc_stat_types[] = { "BLOCK_READ_TIME", "BLOCK_CHECKSUM_TIME", "BLOCK_DECOMPRESS_TIME", + "GET_READ_BYTES", + "MULTIGET_READ_BYTES", + "ITER_READ_BYTES", "INTERNAL_KEY_SKIPPED_COUNT", "INTERNAL_DELETE_SKIPPED_COUNT", + "INTERNAL_RECENT_SKIPPED_COUNT", + "INTERNAL_MERGE_COUNT", "GET_SNAPSHOT_TIME", "GET_FROM_MEMTABLE_TIME", "GET_FROM_MEMTABLE_COUNT", @@ -53,9 +58,12 @@ std::string rdb_pc_stat_types[] = { "GET_FROM_OUTPUT_FILES_TIME", "SEEK_ON_MEMTABLE_TIME", "SEEK_ON_MEMTABLE_COUNT", + "NEXT_ON_MEMTABLE_COUNT", + "PREV_ON_MEMTABLE_COUNT", "SEEK_CHILD_SEEK_TIME", "SEEK_CHILD_SEEK_COUNT", - "SEEK_IN_HEAP_TIME", + "SEEK_MIN_HEAP_TIME", + "SEEK_MAX_HEAP_TIME", "SEEK_INTERNAL_SEEK_TIME", "FIND_NEXT_USER_ENTRY_TIME", "WRITE_WAL_TIME", @@ -71,6 +79,12 @@ std::string rdb_pc_stat_types[] = { "NEW_TABLE_ITERATOR_NANOS", "BLOCK_SEEK_NANOS", "FIND_TABLE_NANOS", + "BLOOM_MEMTABLE_HIT_COUNT", + "BLOOM_MEMTABLE_MISS_COUNT", + "BLOOM_SST_HIT_COUNT", + "BLOOM_SST_MISS_COUNT", + "KEY_LOCK_WAIT_TIME", + "KEY_LOCK_WAIT_COUNT", "IO_THREAD_POOL_ID", "IO_BYTES_WRITTEN", "IO_BYTES_READ", @@ -104,8 +118,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(block_read_time); IO_PERF_RECORD(block_checksum_time); IO_PERF_RECORD(block_decompress_time); + IO_PERF_RECORD(get_read_bytes); + IO_PERF_RECORD(multiget_read_bytes); + IO_PERF_RECORD(iter_read_bytes); IO_PERF_RECORD(internal_key_skipped_count); IO_PERF_RECORD(internal_delete_skipped_count); + IO_PERF_RECORD(internal_recent_skipped_count); + IO_PERF_RECORD(internal_merge_count); IO_PERF_RECORD(get_snapshot_time); IO_PERF_RECORD(get_from_memtable_time); IO_PERF_RECORD(get_from_memtable_count); @@ -113,9 +132,12 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(get_from_output_files_time); IO_PERF_RECORD(seek_on_memtable_time); IO_PERF_RECORD(seek_on_memtable_count); + IO_PERF_RECORD(next_on_memtable_count); + IO_PERF_RECORD(prev_on_memtable_count); IO_PERF_RECORD(seek_child_seek_time); IO_PERF_RECORD(seek_child_seek_count); IO_PERF_RECORD(seek_min_heap_time); + IO_PERF_RECORD(seek_max_heap_time); IO_PERF_RECORD(seek_internal_seek_time); IO_PERF_RECORD(find_next_user_entry_time); IO_PERF_RECORD(write_wal_time); @@ -131,6 +153,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(new_table_iterator_nanos); IO_PERF_RECORD(block_seek_nanos); IO_PERF_RECORD(find_table_nanos); + IO_PERF_RECORD(bloom_memtable_hit_count); + IO_PERF_RECORD(bloom_memtable_miss_count); + IO_PERF_RECORD(bloom_sst_hit_count); + IO_PERF_RECORD(bloom_sst_miss_count); + IO_PERF_RECORD(key_lock_wait_time); + IO_PERF_RECORD(key_lock_wait_count); + IO_STAT_RECORD(thread_pool_id); IO_STAT_RECORD(bytes_written); IO_STAT_RECORD(bytes_read); diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h index ad16d95284b..c2bcf9bf9af 100644 --- a/storage/rocksdb/rdb_perf_context.h +++ b/storage/rocksdb/rdb_perf_context.h @@ -35,8 +35,13 @@ enum { PC_BLOCK_READ_TIME, PC_BLOCK_CHECKSUM_TIME, PC_BLOCK_DECOMPRESS_TIME, + PC_GET_READ_BYTES, + PC_MULTIGET_READ_BYTES, + PC_ITER_READ_BYTES, PC_KEY_SKIPPED, PC_DELETE_SKIPPED, + PC_RECENT_SKIPPED, + PC_MERGE, PC_GET_SNAPSHOT_TIME, PC_GET_FROM_MEMTABLE_TIME, PC_GET_FROM_MEMTABLE_COUNT, @@ -44,9 +49,12 @@ enum { PC_GET_FROM_OUTPUT_FILES_TIME, PC_SEEK_ON_MEMTABLE_TIME, PC_SEEK_ON_MEMTABLE_COUNT, + PC_NEXT_ON_MEMTABLE_COUNT, + PC_PREV_ON_MEMTABLE_COUNT, PC_SEEK_CHILD_SEEK_TIME, PC_SEEK_CHILD_SEEK_COUNT, PC_SEEK_MIN_HEAP_TIME, + PC_SEEK_MAX_HEAP_TIME, PC_SEEK_INTERNAL_SEEK_TIME, PC_FIND_NEXT_USER_ENTRY_TIME, PC_WRITE_WAL_TIME, @@ -62,6 +70,12 @@ enum { PC_NEW_TABLE_ITERATOR_NANOS, PC_BLOCK_SEEK_NANOS, PC_FIND_TABLE_NANOS, + PC_BLOOM_MEMTABLE_HIT_COUNT, + PC_BLOOM_MEMTABLE_MISS_COUNT, + PC_BLOOM_SST_HIT_COUNT, + PC_BLOOM_SST_MISS_COUNT, + PC_KEY_LOCK_WAIT_TIME, + PC_KEY_LOCK_WAIT_COUNT, PC_IO_THREAD_POOL_ID, PC_IO_BYTES_WRITTEN, PC_IO_BYTES_READ, diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc index b6bc89a02f9..b5309df5973 100644 --- a/storage/rocksdb/rdb_psi.cc +++ b/storage/rocksdb/rdb_psi.cc @@ -48,7 +48,7 @@ my_core::PSI_thread_info all_rocksdb_threads[] = { my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key, key_mutex_tx_list, rdb_sysvars_psi_mutex_key, - rdb_cfm_mutex_key; + rdb_cfm_mutex_key, rdb_sst_commit_key; my_core::PSI_mutex_info all_rocksdb_mutexes[] = { {&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL}, @@ -60,6 +60,7 @@ my_core::PSI_mutex_info all_rocksdb_mutexes[] = { {&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL}, {&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL}, {&rdb_cfm_mutex_key, "column family manager", PSI_FLAG_GLOBAL}, + {&rdb_sst_commit_key, "sst commit", PSI_FLAG_GLOBAL}, }; my_core::PSI_rwlock_key key_rwlock_collation_exception_list, diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h index 0a62f411ade..d4318ee3dba 100644 --- a/storage/rocksdb/rdb_psi.h +++ b/storage/rocksdb/rdb_psi.h @@ -40,7 +40,8 @@ extern my_core::PSI_thread_key rdb_background_psi_thread_key, extern my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key, - key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key; + key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key, + rdb_sst_commit_key; extern my_core::PSI_rwlock_key key_rwlock_collation_exception_list, key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables; diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc index 9afe9807317..ae3cbdf0197 100644 --- a/storage/rocksdb/rdb_sst_info.cc +++ b/storage/rocksdb/rdb_sst_info.cc @@ -35,6 +35,7 @@ #include "./ha_rocksdb.h" #include "./ha_rocksdb_proto.h" #include "./rdb_cf_options.h" +#include "./rdb_psi.h" namespace myrocks { @@ -252,7 +253,6 @@ rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key, if (!m_first_key.empty()) { rocksdb::Slice first_key_slice(m_first_key); int cmp = m_file.compare(first_key_slice, key); - DBUG_ASSERT(cmp != 0); m_use_stack = (cmp > 0); // Apply the first key to the stack or SST @@ -316,11 +316,11 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, const rocksdb::DBOptions &db_options, const bool &tracing) : m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0), - m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), + m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false), #if defined(RDB_SST_INFO_USE_THREAD) m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false), #endif - m_sst_file(nullptr), m_tracing(tracing) { + m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) { m_prefix = db->GetName() + "/"; std::string normalized_table; @@ -347,6 +347,7 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, // Set the maximum size to 3 times the cf's target size m_max_size = cf_descr.options.target_file_size_base * 3; } + mysql_mutex_init(rdb_sst_commit_key, &m_commit_mutex, MY_MUTEX_INIT_FAST); } Rdb_sst_info::~Rdb_sst_info() { @@ -354,6 +355,7 @@ Rdb_sst_info::~Rdb_sst_info() { #if defined(RDB_SST_INFO_USE_THREAD) DBUG_ASSERT(m_thread == nullptr); #endif + mysql_mutex_destroy(&m_commit_mutex); } int Rdb_sst_info::open_new_sst_file() { @@ -418,6 +420,8 @@ void Rdb_sst_info::close_curr_sst_file() { int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { int rc; + DBUG_ASSERT(!m_committed); + if (m_curr_size + key.size() + value.size() >= m_max_size) { // The current sst file has reached its maximum, close it out close_curr_sst_file(); @@ -451,7 +455,21 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { return HA_EXIT_SUCCESS; } -int Rdb_sst_info::commit() { +int Rdb_sst_info::commit(bool print_client_error) { + int ret = HA_EXIT_SUCCESS; + + // Both the transaction clean up and the ha_rocksdb handler have + // references to this Rdb_sst_info and both can call commit, so + // synchronize on the object here. + RDB_MUTEX_LOCK_CHECK(m_commit_mutex); + + if (m_committed) { + RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); + return ret; + } + + m_print_client_error = print_client_error; + if (m_curr_size > 0) { // Close out any existing files close_curr_sst_file(); @@ -470,16 +488,24 @@ int Rdb_sst_info::commit() { } #endif + m_committed = true; + RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); + // Did we get any errors? if (have_background_error()) { - return get_and_reset_background_error(); + ret = get_and_reset_background_error(); } - return HA_EXIT_SUCCESS; + m_print_client_error = true; + return ret; } void Rdb_sst_info::set_error_msg(const std::string &sst_file_name, const rocksdb::Status &s) { + + if (!m_print_client_error) + return; + #if defined(RDB_SST_INFO_USE_THREAD) // Both the foreground and background threads can set the error message // so lock the mutex to protect it. We only want the first error that diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h index 358b1933592..bdd7cdca5f3 100644 --- a/storage/rocksdb/rdb_sst_info.h +++ b/storage/rocksdb/rdb_sst_info.h @@ -128,6 +128,8 @@ class Rdb_sst_info { std::string m_prefix; static std::atomic m_prefix_counter; static std::string m_suffix; + bool m_committed; + mysql_mutex_t m_commit_mutex; #if defined(RDB_SST_INFO_USE_THREAD) std::queue m_queue; std::mutex m_mutex; @@ -137,6 +139,7 @@ class Rdb_sst_info { #endif Rdb_sst_file_ordered *m_sst_file; const bool m_tracing; + bool m_print_client_error; int open_new_sst_file(); void close_curr_sst_file(); @@ -157,7 +160,8 @@ class Rdb_sst_info { ~Rdb_sst_info(); int put(const rocksdb::Slice &key, const rocksdb::Slice &value); - int commit(); + int commit(bool print_client_error = true); + bool is_committed() const { return m_committed; } bool have_background_error() { return m_background_error != 0; } diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc index 7e66f22991d..718aec7345d 100644 --- a/storage/rocksdb/rdb_utils.cc +++ b/storage/rocksdb/rdb_utils.cc @@ -303,4 +303,30 @@ void rdb_log_status_error(const rocksdb::Status &s, const char *msg) { s.ToString().c_str()); } +bool rdb_check_rocksdb_corruption() { + return !my_access(myrocks::rdb_corruption_marker_file_name().c_str(), F_OK); +} + +void rdb_persist_corruption_marker() { + const std::string &fileName(myrocks::rdb_corruption_marker_file_name()); + int fd = my_open(fileName.c_str(), O_CREAT | O_SYNC, MYF(MY_WME)); + if (fd < 0) { + sql_print_error("RocksDB: Can't create file %s to mark rocksdb as " + "corrupted.", + fileName.c_str()); + } else { + sql_print_information("RocksDB: Creating the file %s to abort mysqld " + "restarts. Remove this file from the data directory " + "after fixing the corruption to recover. ", + fileName.c_str()); + } + + int ret = my_close(fd, MYF(MY_WME)); + if (ret) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Error (%d) closing the file %s", ret, + fileName.c_str()); + } +} + } // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h index 5b12e930144..8ec60a6813c 100644 --- a/storage/rocksdb/rdb_utils.h +++ b/storage/rocksdb/rdb_utils.h @@ -82,7 +82,7 @@ namespace myrocks { do { \ if (!(expr)) { \ my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ - abort_with_stack_traces(); \ + abort(); \ } \ } while (0) #endif // SHIP_ASSERT @@ -235,12 +235,20 @@ inline void rdb_check_mutex_call_result(const char *function_name, // This will hopefully result in a meaningful stack trace which we can use // to efficiently debug the root cause. - abort_with_stack_traces(); + abort(); } } void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr); +// return true if the marker file exists which indicates that the corruption +// has been detected +bool rdb_check_rocksdb_corruption(); + +// stores a marker file in the data directory so that after restart server +// is still aware that rocksdb data is corrupted +void rdb_persist_corruption_marker(); + /* Helper functions to parse strings. */ From 819adb44a8bce0c440e548391eb4e834cc60b4d8 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 29 Jan 2018 16:07:23 +0300 Subject: [PATCH 02/54] Post-merge fixes - Fix rocksdb.rocksdb test - Update rocksdb.autoinc_crash_safe_partition* tests --- .../rocksdb/r/autoinc_crash_safe.result | 25 +++++++-- .../r/autoinc_crash_safe_partition.result | 25 +++++++-- .../mysql-test/rocksdb/r/rocksdb.result | 53 +++++++++---------- .../rocksdb/t/autoinc_crash_safe.cnf | 2 +- .../rocksdb/t/autoinc_crash_safe.test | 2 +- .../t/autoinc_crash_safe_partition.cnf | 4 +- .../t/autoinc_crash_safe_partition.test | 2 +- .../rocksdb/mysql-test/rocksdb/t/rocksdb.test | 1 - 8 files changed, 75 insertions(+), 39 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result index ae29b4e415d..60395eced7e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result @@ -1,23 +1,32 @@ include/master-slave.inc -Warnings: -Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. -Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. [connection master] create table t (i int primary key auto_increment) engine=rocksdb; # # Testing concurrent transactions. # +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection con1; begin; insert into t values (); +connection con2; begin; insert into t values (); +connection con3; begin; insert into t values (); +connection con1; insert into t values (); +connection con2; insert into t values (); +connection con3; insert into t values (); +connection con2; commit; +connection con3; rollback; +connection con1; commit; delete from t; # Master value before restart @@ -25,25 +34,34 @@ select table_schema, table_name, auto_increment from information_schema.tables w table_schema table_name auto_increment test t 7 # Slave value before restart +connection slave; select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 +connection slave; include/stop_slave.inc include/rpl_restart_server.inc [server_number=1] +connection default; # Master value after restart select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 include/rpl_restart_server.inc [server_number=2] +connection slave; include/start_slave.inc # Slave value after restart select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 +disconnect con1; +disconnect con2; +disconnect con3; # # Testing interaction of merge markers with various DDL statements. # +connection slave; include/stop_slave.inc +connection default; # Drop and add primary key. alter table t modify i int; alter table t drop primary key; @@ -109,5 +127,6 @@ test t 16 # Drop table. drop table t; include/rpl_restart_server.inc [server_number=1] +connection slave; include/start_slave.inc include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result index 949928f5a9f..c837fb7c77d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result @@ -1,23 +1,32 @@ include/master-slave.inc -Warnings: -Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. -Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. [connection master] create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; # # Testing concurrent transactions. # +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection con1; begin; insert into t values (); +connection con2; begin; insert into t values (); +connection con3; begin; insert into t values (); +connection con1; insert into t values (); +connection con2; insert into t values (); +connection con3; insert into t values (); +connection con2; commit; +connection con3; rollback; +connection con1; commit; delete from t; # Master value before restart @@ -25,25 +34,34 @@ select table_schema, table_name, auto_increment from information_schema.tables w table_schema table_name auto_increment test t 7 # Slave value before restart +connection slave; select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 +connection slave; include/stop_slave.inc include/rpl_restart_server.inc [server_number=1] +connection default; # Master value after restart select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 include/rpl_restart_server.inc [server_number=2] +connection slave; include/start_slave.inc # Slave value after restart select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; table_schema table_name auto_increment test t 6 +disconnect con1; +disconnect con2; +disconnect con3; # # Testing interaction of merge markers with various DDL statements. # +connection slave; include/stop_slave.inc +connection default; # Drop and add primary key. alter table t modify i int; alter table t drop primary key; @@ -109,5 +127,6 @@ test t 16 # Drop table. drop table t; include/rpl_restart_server.inc [server_number=1] +connection slave; include/start_slave.inc include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index 1ad3ef620db..58a83977855 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -862,9 +862,10 @@ ERROR 42S02: Unknown table 'test.t45' # Now it fails if there is data overlap with what # already exists # -show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files'; +show variables where variable_name like 'rocksdb%' and +variable_name not like 'rocksdb_max_open_files' and variable_name not like 'rocksdb_supported_compression_types'; Variable_name Value rocksdb_access_hint_on_compaction_start 1 @@ -1461,7 +1462,7 @@ Rocksdb_rows_read # Rocksdb_rows_updated # Rocksdb_rows_deleted_blind # Rocksdb_rows_expired # -rocksdb_rows_filtered # +Rocksdb_rows_filtered # Rocksdb_system_rows_deleted # Rocksdb_system_rows_inserted # Rocksdb_system_rows_read # @@ -1472,31 +1473,31 @@ Rocksdb_queries_point # Rocksdb_queries_range # Rocksdb_covered_secondary_key_lookups # Rocksdb_block_cache_add # +Rocksdb_block_cache_add_failures # +Rocksdb_block_cache_bytes_read # +Rocksdb_block_cache_bytes_write # +Rocksdb_block_cache_data_add # +Rocksdb_block_cache_data_bytes_insert # Rocksdb_block_cache_data_hit # -rocksdb_block_cache_add_failures # -rocksdb_block_cache_bytes_read # -rocksdb_block_cache_bytes_write # -rocksdb_block_cache_data_add # -rocksdb_block_cache_data_bytes_insert # Rocksdb_block_cache_data_miss # +Rocksdb_block_cache_filter_add # +Rocksdb_block_cache_filter_bytes_evict # +Rocksdb_block_cache_filter_bytes_insert # Rocksdb_block_cache_filter_hit # Rocksdb_block_cache_filter_miss # Rocksdb_block_cache_hit # +Rocksdb_block_cache_index_add # +Rocksdb_block_cache_index_bytes_evict # +Rocksdb_block_cache_index_bytes_insert # Rocksdb_block_cache_index_hit # Rocksdb_block_cache_index_miss # Rocksdb_block_cache_miss # -rocksdb_block_cache_filter_add # -rocksdb_block_cache_filter_bytes_evict # -rocksdb_block_cache_filter_bytes_insert # Rocksdb_block_cachecompressed_hit # Rocksdb_block_cachecompressed_miss # Rocksdb_bloom_filter_prefix_checked # Rocksdb_bloom_filter_prefix_useful # Rocksdb_bloom_filter_useful # Rocksdb_bytes_read # -rocksdb_block_cache_index_add # -rocksdb_block_cache_index_bytes_evict # -rocksdb_block_cache_index_bytes_insert # Rocksdb_bytes_written # Rocksdb_compact_read_bytes # Rocksdb_compact_write_bytes # @@ -1504,7 +1505,11 @@ Rocksdb_compaction_key_drop_new # Rocksdb_compaction_key_drop_obsolete # Rocksdb_compaction_key_drop_user # Rocksdb_flush_write_bytes # +Rocksdb_get_hit_l0 # +Rocksdb_get_hit_l1 # +Rocksdb_get_hit_l2_and_up # Rocksdb_getupdatessince_calls # +Rocksdb_iter_bytes_read # Rocksdb_memtable_hit # Rocksdb_memtable_miss # Rocksdb_no_file_closes # @@ -1512,35 +1517,31 @@ Rocksdb_no_file_errors # Rocksdb_no_file_opens # Rocksdb_num_iterators # Rocksdb_number_block_not_compressed # +Rocksdb_number_db_next # +Rocksdb_number_db_next_found # +Rocksdb_number_db_prev # +Rocksdb_number_db_prev_found # +Rocksdb_number_db_seek # +Rocksdb_number_db_seek_found # Rocksdb_number_deletes_filtered # Rocksdb_number_keys_read # Rocksdb_number_keys_updated # Rocksdb_number_keys_written # -rocksdb_get_hit_l0 # -rocksdb_get_hit_l1 # -rocksdb_get_hit_l2_and_up # Rocksdb_number_merge_failures # Rocksdb_number_multiget_bytes_read # Rocksdb_number_multiget_get # Rocksdb_number_multiget_keys_read # Rocksdb_number_reseeks_iteration # Rocksdb_number_sst_entry_delete # -rocksdb_iter_bytes_read # Rocksdb_number_sst_entry_merge # Rocksdb_number_sst_entry_other # Rocksdb_number_sst_entry_put # Rocksdb_number_sst_entry_singledelete # -Rocksdb_number_stat_computes # Rocksdb_number_superversion_acquires # Rocksdb_number_superversion_cleanups # Rocksdb_number_superversion_releases # -rocksdb_number_db_next # -rocksdb_number_db_next_found # -rocksdb_number_db_prev # -rocksdb_number_db_prev_found # -rocksdb_number_db_seek # -rocksdb_number_db_seek_found # -Rocksdb_rate_limit_delay_millis # +Rocksdb_row_lock_deadlocks # +Rocksdb_row_lock_wait_timeouts # Rocksdb_snapshot_conflict_errors # Rocksdb_stall_l0_file_count_limit_slowdowns # Rocksdb_stall_locked_l0_file_count_limit_slowdowns # @@ -1560,8 +1561,6 @@ Rocksdb_write_other # Rocksdb_write_self # Rocksdb_write_timedout # Rocksdb_write_wal # -rocksdb_row_lock_deadlocks # -rocksdb_row_lock_wait_timeouts # select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%'; VARIABLE_NAME ROCKSDB_ROWS_DELETED diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf index f4257d80fdb..a43c4617b96 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf @@ -5,4 +5,4 @@ binlog_format=row [mysqld.2] binlog_format=row slave_parallel_workers=1 -rpl_skip_tx_api=ON +#rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test index b4f329dd1e7..e61ba720aaf 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test @@ -4,6 +4,6 @@ create table t (i int primary key auto_increment) engine=rocksdb; ---source suite/rocksdb/include/autoinc_crash_safe.inc +--source include/autoinc_crash_safe.inc --source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf index f4257d80fdb..0c0b614039e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf @@ -4,5 +4,5 @@ binlog_format=row [mysqld.2] binlog_format=row -slave_parallel_workers=1 -rpl_skip_tx_api=ON +#slave_parallel_workers=1 +#rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test index 5b6a761dd94..56cf93db9d9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test @@ -5,6 +5,6 @@ create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; ---source suite/rocksdb/include/autoinc_crash_safe.inc +--source include/autoinc_crash_safe.inc --source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test index 345e29e8df2..b884738424f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test @@ -788,7 +788,6 @@ drop table t45; # We exclude rocksdb_max_open_files here because it value is dependent on # the value of the servers open_file_limit and is expected to be different # across distros and installs -show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files'; --replace_regex /[a-f0-9]{40}/#/ show variables From 669e910a6a9522495574cd3cc8b3925e56b78ec8 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 30 Jan 2018 12:50:30 +0000 Subject: [PATCH 03/54] More post-merge fixes --- .../rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result | 3 +++ storage/rocksdb/mysql-test/rocksdb/r/cardinality.result | 1 + 2 files changed, 4 insertions(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result index 1b1cf524011..4e79d82810e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result @@ -2,7 +2,10 @@ CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; SET rocksdb_bulk_load_allow_unsorted=1; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES (1); +connect con1,localhost,root,,; DROP TABLE t1; +connection default; +disconnect con1; SET rocksdb_bulk_load=0; SELECT * FROM t1; ERROR 42S02: Table 'test.t1' doesn't exist diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result index c59b9804ef3..4b201d523d9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result @@ -33,6 +33,7 @@ information_schema.statistics where table_name="t0" and column_name="a"; FLOOR(@N/cardinality) 2 drop table t0; +DROP TABLE IF EXISTS t1,t10,t11; create table t1( id bigint not null primary key, i1 bigint, #unique From ba03577a1f141eec2e63ea5df16c04168791fe6b Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 31 Jan 2018 14:31:44 +0000 Subject: [PATCH 04/54] Post-merge fixes: make rocksdb.bulk_load_unsorted_rev to work. --- .../rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc | 1 + .../rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result | 3 +++ 2 files changed, 4 insertions(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc index 84a9d8c578e..4a3158e814c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -1,3 +1,4 @@ +--source include/have_partition.inc --source include/count_sessions.inc SET rocksdb_bulk_load_size=3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result index fcd05fd60b4..f828fa57255 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result @@ -57,6 +57,7 @@ CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1") ENGINE=ROCKSDB; CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; +connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE @@ -65,6 +66,7 @@ start transaction with consistent snapshot; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; STAT_TYPE VALUE DB_NUM_SNAPSHOTS 1 +connection default; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE INTO TABLE t1; @@ -114,4 +116,5 @@ a b -4999998 5000000 -4999996 4999998 -4999994 4999996 +disconnect other; DROP TABLE t1, t2, t3; From f457a113ab64791514068560c1dfaf4df6f02683 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 1 Feb 2018 11:56:31 +0000 Subject: [PATCH 05/54] Post-merge fixes: update .result for rocksdb.i_s_deadlock --- .../mysql-test/rocksdb/r/i_s_deadlock.result | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result index 1c67387bcdc..36db92095e9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result @@ -6,58 +6,74 @@ set global rocksdb_lock_wait_timeout = 10000; # Clears deadlock buffer of any prior deadlocks. set global rocksdb_max_latest_deadlocks = 0; set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection default; show create table information_schema.rocksdb_deadlock; Table Create Table ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` ( - `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT '0', - `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT '0', + `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT 0, + `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT 0, `CF_NAME` varchar(193) NOT NULL DEFAULT '', `WAITING_KEY` varchar(513) NOT NULL DEFAULT '', `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '', `INDEX_NAME` varchar(193) NOT NULL DEFAULT '', `TABLE_NAME` varchar(193) NOT NULL DEFAULT '', - `ROLLED_BACK` bigint(8) NOT NULL DEFAULT '0' + `ROLLED_BACK` bigint(8) NOT NULL DEFAULT 0 ) ENGINE=MEMORY DEFAULT CHARSET=utf8 create table t (i int primary key) engine=rocksdb; insert into t values (1), (2), (3); select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK Deadlock #1 +connection con1; begin; select * from t where i=1 for update; i 1 +connection con2; begin; select * from t where i=2 for update; i 2 +connection con1; select * from t where i=2 for update; +connection con2; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction rollback; +connection con1; i 2 rollback; +connection default; select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 Deadlock #2 +connection con1; begin; select * from t where i=1 for update; i 1 +connection con2; begin; select * from t where i=2 for update; i 2 +connection con1; select * from t where i=2 for update; +connection con2; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction rollback; +connection con1; i 2 rollback; +connection default; select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 @@ -66,21 +82,27 @@ DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 set global rocksdb_max_latest_deadlocks = 10; Deadlock #3 +connection con1; begin; select * from t where i=1 for update; i 1 +connection con2; begin; select * from t where i=2 for update; i 2 +connection con1; select * from t where i=2 for update; +connection con2; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction rollback; +connection con1; i 2 rollback; +connection default; select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 @@ -94,52 +116,67 @@ select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +connection con3; set rocksdb_deadlock_detect_depth = 2; Deadlock #4 +connection con1; begin; select * from t where i=1 for update; i 1 +connection con2; begin; select * from t where i=2 for update; i 2 +connection con3; begin; select * from t where i=3 for update; i 3 +connection con1; select * from t where i=2 for update; +connection con2; select * from t where i=3 for update; +connection con3; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction rollback; +connection con2; i 3 rollback; +connection con1; i 2 rollback; +connection default; set global rocksdb_max_latest_deadlocks = 5; select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK Deadlock #5 +connection con1; begin; select * from t where i=1 for update; i 1 +connection con2; begin; select * from t where i=2 for update; i 2 +connection con3; begin; select * from t where i=3 lock in share mode; i 3 +connection con1; select * from t where i=100 for update; i select * from t where i=101 for update; i select * from t where i=2 for update; +connection con2; select * from t where i=3 lock in share mode; i 3 @@ -150,14 +187,20 @@ i select * from t where i=1 lock in share mode; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction rollback; +connection con1; i 2 rollback; +connection con3; rollback; +connection default; select * from information_schema.rocksdb_deadlock; DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1 +disconnect con1; +disconnect con2; +disconnect con3; set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; set global rocksdb_deadlock_detect = @prior_deadlock_detect; drop table t; From d0fd44fc524bb16d130038aa7d93897880b8f614 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 1 Feb 2018 19:44:48 +0000 Subject: [PATCH 06/54] Post-merge fixes: MariaDB-fication of rdb_i_s_deadlock_info --- .../rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result | 1 + storage/rocksdb/rdb_i_s.cc | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result index 021373be02a..8582b83c8de 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result @@ -81,3 +81,4 @@ ROCKSDB_DDL Beta ROCKSDB_INDEX_FILE_MAP Beta ROCKSDB_LOCKS Beta ROCKSDB_TRX Beta +ROCKSDB_DEADLOCK Beta diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index 3ecd998dec5..fd52ad4889a 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -1775,7 +1775,7 @@ struct st_maria_plugin rdb_i_s_trx_info = { MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL }; -struct st_mysql_plugin rdb_i_s_deadlock_info = { +struct st_maria_plugin rdb_i_s_deadlock_info = { MYSQL_INFORMATION_SCHEMA_PLUGIN, &rdb_i_s_info, "ROCKSDB_DEADLOCK", @@ -1788,6 +1788,6 @@ struct st_mysql_plugin rdb_i_s_deadlock_info = { nullptr, /* status variables */ nullptr, /* system variables */ nullptr, /* config options */ - 0, /* flags */ + MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL }; } // namespace myrocks From 48cfb3b80d814a58f8cc68c02195cef1d2a579bd Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 1 Feb 2018 23:37:41 +0000 Subject: [PATCH 07/54] Post-merge fixes: rocksdb.i_s_ddl --- storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result | 6 +++--- storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result index 526d6247e60..6bca2cbad2d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result @@ -11,12 +11,12 @@ CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB COMMENT "ttl_duration=3600;"; SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS -test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1 -test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0 -test is_ddl_t2 NULL x 2 13 default 0 0 test is_ddl_t1 NULL PRIMARY 1 13 default 0 0 test is_ddl_t1 NULL j 2 13 default 0 0 test is_ddl_t1 NULL k 2 13 kl_cf 0 0 +test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0 +test is_ddl_t2 NULL x 2 13 default 0 0 +test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1 DROP TABLE is_ddl_t1; DROP TABLE is_ddl_t2; DROP TABLE is_ddl_t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test index e1c04980c15..716f372067b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test @@ -17,10 +17,10 @@ CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, CREATE TABLE is_ddl_t2 (x INT, y INT, z INT, PRIMARY KEY (z, y) COMMENT 'zy_cf', KEY (x)) ENGINE = ROCKSDB; ---sorted_result CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB COMMENT "ttl_duration=3600;"; +--sorted_result SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; # cleanup From 34ff2967c5896797174598ac8d70bf941f2eb3f4 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 8 Feb 2018 19:25:19 +0530 Subject: [PATCH 08/54] Post merge fix rocksdb_set_update_cf_options functions was freeing a pointer which it should not. --- storage/rocksdb/ha_rocksdb.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index aa1e08fcf50..5f19faca6cb 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -13141,7 +13141,6 @@ rocksdb_set_update_cf_options(THD *const /* unused */, } } - my_free(*reinterpret_cast(var_ptr)); // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to // free up resources used before. From e7efc793504c0cd9d8b6cf9198314ae9b3c58988 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 9 Apr 2018 01:10:43 +0300 Subject: [PATCH 09/54] Make rocksdb.information_schema test stable --- .../rocksdb/mysql-test/rocksdb/r/information_schema.result | 7 +++---- .../rocksdb/mysql-test/rocksdb/t/information_schema.test | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result index 6850d8dff16..aba14e3c076 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result @@ -4,15 +4,14 @@ DROP TABLE IF EXISTS t3; SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1; create table t1 (a int) engine=rocksdb; drop table t1; -select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; TYPE NAME VALUE MAX_INDEX_ID MAX_INDEX_ID max_index_id CF_FLAGS 0 default [0] CF_FLAGS 1 __system__ [0] -DDL_DROP_INDEX_ONGOING cf_id:0,index_id:max_index_id -select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; count(*) -4 +3 SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0; select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test index 2ffc186dd8f..67dae5d6263 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test @@ -19,8 +19,8 @@ drop table t1; --let $max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1) --replace_result $max_index_id max_index_id -select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; -select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; +select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0; select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; From 8628c589f69aea164ec1865f52eb0f5bc5154ed8 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 9 Apr 2018 15:27:35 +0300 Subject: [PATCH 10/54] Fix ha_rocksdb::calc_eq_cond_len() to handle HA_READ_PREFIX_LAST_OR_PREV correctly --- storage/rocksdb/ha_rocksdb.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 5f19faca6cb..9d3d07e72f0 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7464,6 +7464,13 @@ int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, */ return slice.size() - bytes_changed_by_succ; } + if (find_flag == HA_READ_PREFIX_LAST_OR_PREV) { + /* + We are interested in records with or without the specified prefix, so + the only limit is that records are from this index + */ + return Rdb_key_def::INDEX_NUMBER_SIZE; + } if (end_key) { *end_key_packed_size = From 7e700bd2a81ae4b37145f1c32bb0902c72856d2d Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 9 Apr 2018 19:12:23 +0300 Subject: [PATCH 11/54] Fix ha_rocksdb::calc_eq_cond_len() to handle HA_READ_PREFIX_LAST_OR_PREV correctly This is Variant#2. - Undo Variant#1 - Instead, swap the range bounds if we are doing a reverse-ordered scan. --- sql/opt_range.cc | 4 ++-- storage/rocksdb/ha_rocksdb.cc | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index cc5746925a2..bc047b25f30 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -11677,8 +11677,8 @@ int QUICK_SELECT_DESC::get_next() end_key.flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); end_key.keypart_map= last_range->max_keypart_map; - result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &start_key, - (last_range->flag & NO_MAX_RANGE) ? NULL : &end_key); + result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &end_key, + (last_range->flag & NO_MAX_RANGE) ? NULL : &start_key); if (result) { DBUG_RETURN(result); diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 9d3d07e72f0..5f19faca6cb 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7464,13 +7464,6 @@ int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, */ return slice.size() - bytes_changed_by_succ; } - if (find_flag == HA_READ_PREFIX_LAST_OR_PREV) { - /* - We are interested in records with or without the specified prefix, so - the only limit is that records are from this index - */ - return Rdb_key_def::INDEX_NUMBER_SIZE; - } if (end_key) { *end_key_packed_size = From 5715177a76e61a7fbf2be920786055704544592f Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 11 Apr 2018 18:39:06 +0300 Subject: [PATCH 12/54] - Fix a merge error - Test result updates --- storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result | 1 + storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 9a7c2560819..2adaba1e228 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -116,4 +116,5 @@ a b 4999999 -4999997 4999997 -4999995 4999995 -4999993 +disconnect other; DROP TABLE t1, t2, t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test index f4d850d78b4..2abeae343c9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test @@ -4,4 +4,3 @@ --let pk_cf=cf1 --source ../include/bulk_load_unsorted.inc -binmode $fh; From 34e6b5fa6ff34c19278f3dd13c625123082cfb7f Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 12 Apr 2018 20:28:54 +0300 Subject: [PATCH 13/54] Undo fix for MDEV-13852: RocksDB now has a proper WinWriteableFile::IsSyncThreadSafe() Commit being reverted is: commit 689168be1240cf61d0ea17529e79660e98d15b46 Author: Vladislav Vaintroub Date: Thu Nov 16 18:57:18 2017 +0000 MDEV-13852 - redefine WinWriteableFile such as IsSyncThreadSafe() is set to true, as it should. Copy and modify original io_win.h header file to a different location (as we cannot patch anything in submodule). Make sure modified header is used. --- storage/rocksdb/build_rocksdb.cmake | 5 - storage/rocksdb/patch/port/win/io_win.h | 446 ------------------------ 2 files changed, 451 deletions(-) delete mode 100644 storage/rocksdb/patch/port/win/io_win.h diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index b9b70593166..c76f711463e 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -12,11 +12,6 @@ INCLUDE_DIRECTORIES( ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src ) -IF(WIN32) - INCLUDE_DIRECTORIES(BEFORE - ${CMAKE_CURRENT_SOURCE_DIR}/patch) -ENDIF() - list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") if(WIN32) diff --git a/storage/rocksdb/patch/port/win/io_win.h b/storage/rocksdb/patch/port/win/io_win.h deleted file mode 100644 index f5ff253bbaa..00000000000 --- a/storage/rocksdb/patch/port/win/io_win.h +++ /dev/null @@ -1,446 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#pragma once - -#include -#include -#include - -#include "rocksdb/status.h" -#include "rocksdb/env.h" -#include "util/aligned_buffer.h" - -#include - - -namespace rocksdb { -namespace port { - -std::string GetWindowsErrSz(DWORD err); - -inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) { - return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL)) - ? Status::NoSpace(context, GetWindowsErrSz(err)) - : Status::IOError(context, GetWindowsErrSz(err)); -} - -inline Status IOErrorFromLastWindowsError(const std::string& context) { - return IOErrorFromWindowsError(context, GetLastError()); -} - -inline Status IOError(const std::string& context, int err_number) { - return (err_number == ENOSPC) - ? Status::NoSpace(context, strerror(err_number)) - : Status::IOError(context, strerror(err_number)); -} - -// Note the below two do not set errno because they are used only here in this -// file -// on a Windows handle and, therefore, not necessary. Translating GetLastError() -// to errno -// is a sad business -inline int fsync(HANDLE hFile) { - if (!FlushFileBuffers(hFile)) { - return -1; - } - - return 0; -} - -SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, uint64_t offset); - -SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset); - -Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); - -Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize); - -size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size); - -class WinFileData { - protected: - const std::string filename_; - HANDLE hFile_; - // If ture, the I/O issued would be direct I/O which the buffer - // will need to be aligned (not sure there is a guarantee that the buffer - // passed in is aligned). - const bool use_direct_io_; - - public: - // We want this class be usable both for inheritance (prive - // or protected) and for containment so __ctor and __dtor public - WinFileData(const std::string& filename, HANDLE hFile, bool direct_io) - : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {} - - virtual ~WinFileData() { this->CloseFile(); } - - bool CloseFile() { - bool result = true; - - if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) { - result = ::CloseHandle(hFile_); - assert(result); - hFile_ = NULL; - } - return result; - } - - const std::string& GetName() const { return filename_; } - - HANDLE GetFileHandle() const { return hFile_; } - - bool use_direct_io() const { return use_direct_io_; } - - WinFileData(const WinFileData&) = delete; - WinFileData& operator=(const WinFileData&) = delete; -}; - -class WinSequentialFile : protected WinFileData, public SequentialFile { - - // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; - -public: - WinSequentialFile(const std::string& fname, HANDLE f, - const EnvOptions& options); - - ~WinSequentialFile(); - - WinSequentialFile(const WinSequentialFile&) = delete; - WinSequentialFile& operator=(const WinSequentialFile&) = delete; - - virtual Status Read(size_t n, Slice* result, char* scratch) override; - virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, - char* scratch) override; - - virtual Status Skip(uint64_t n) override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } -}; - -// mmap() based random-access -class WinMmapReadableFile : private WinFileData, public RandomAccessFile { - HANDLE hMap_; - - const void* mapped_region_; - const size_t length_; - - public: - // mapped_region_[0,length-1] contains the mmapped contents of the file. - WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap, - const void* mapped_region, size_t length); - - ~WinMmapReadableFile(); - - WinMmapReadableFile(const WinMmapReadableFile&) = delete; - WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete; - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -// We preallocate and use memcpy to append new -// data to the file. This is safe since we either properly close the -// file before reading from it, or for log files, the reading code -// knows enough to skip zero suffixes. -class WinMmapFile : private WinFileData, public WritableFile { - private: - HANDLE hMap_; - - const size_t page_size_; // We flush the mapping view in page_size - // increments. We may decide if this is a memory - // page size or SSD page size - const size_t - allocation_granularity_; // View must start at such a granularity - - size_t reserved_size_; // Preallocated size - - size_t mapping_size_; // The max size of the mapping object - // we want to guess the final file size to minimize the remapping - size_t view_size_; // How much memory to map into a view at a time - - char* mapped_begin_; // Must begin at the file offset that is aligned with - // allocation_granularity_ - char* mapped_end_; - char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_]) - char* last_sync_; // Where have we synced up to - - uint64_t file_offset_; // Offset of mapped_begin_ in file - - // Do we have unsynced writes? - bool pending_sync_; - - // Can only truncate or reserve to a sector size aligned if - // used on files that are opened with Unbuffered I/O - Status TruncateFile(uint64_t toSize); - - Status UnmapCurrentRegion(); - - Status MapNewRegion(); - - virtual Status PreallocateInternal(uint64_t spaceToReserve); - - public: - WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size, - size_t allocation_granularity, const EnvOptions& options); - - ~WinMmapFile(); - - WinMmapFile(const WinMmapFile&) = delete; - WinMmapFile& operator=(const WinMmapFile&) = delete; - - virtual Status Append(const Slice& data) override; - - // Means Close() will properly take care of truncate - // and it does not need any additional information - virtual Status Truncate(uint64_t size) override; - - virtual Status Close() override; - - virtual Status Flush() override; - - // Flush only data - virtual Status Sync() override; - - /** - * Flush data as well as metadata to stable storage. - */ - virtual Status Fsync() override; - - /** - * Get the size of valid data in the file. This will not match the - * size that is returned from the filesystem because we use mmap - * to extend file by map_size every time. - */ - virtual uint64_t GetFileSize() override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual Status Allocate(uint64_t offset, uint64_t len) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -class WinRandomAccessImpl { - protected: - WinFileData* file_base_; - size_t alignment_; - - // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; - - WinRandomAccessImpl(WinFileData* file_base, size_t alignment, - const EnvOptions& options); - - virtual ~WinRandomAccessImpl() {} - - Status ReadImpl(uint64_t offset, size_t n, Slice* result, - char* scratch) const; - - size_t GetAlignment() const { return alignment_; } - - public: - - WinRandomAccessImpl(const WinRandomAccessImpl&) = delete; - WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete; -}; - -// pread() based random-access -class WinRandomAccessFile - : private WinFileData, - protected WinRandomAccessImpl, // Want to be able to override - // PositionedReadInternal - public RandomAccessFile { - public: - WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, - const EnvOptions& options); - - ~WinRandomAccessFile(); - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; - - virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual size_t GetRequiredBufferAlignment() const override; -}; - -// This is a sequential write class. It has been mimicked (as others) after -// the original Posix class. We add support for unbuffered I/O on windows as -// well -// we utilize the original buffer as an alignment buffer to write directly to -// file with no buffering. -// No buffering requires that the provided buffer is aligned to the physical -// sector size (SSD page size) and -// that all SetFilePointer() operations to occur with such an alignment. -// We thus always write in sector/page size increments to the drive and leave -// the tail for the next write OR for Close() at which point we pad with zeros. -// No padding is required for -// buffered access. -class WinWritableImpl { - protected: - WinFileData* file_data_; - const uint64_t alignment_; - uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND - uint64_t reservedsize_; // how far we have reserved space - - virtual Status PreallocateInternal(uint64_t spaceToReserve); - - WinWritableImpl(WinFileData* file_data, size_t alignment); - - ~WinWritableImpl() {} - - - uint64_t GetAlignement() const { return alignment_; } - - Status AppendImpl(const Slice& data); - - // Requires that the data is aligned as specified by - // GetRequiredBufferAlignment() - Status PositionedAppendImpl(const Slice& data, uint64_t offset); - - Status TruncateImpl(uint64_t size); - - Status CloseImpl(); - - Status SyncImpl(); - - uint64_t GetFileNextWriteOffset() { - // Double accounting now here with WritableFileWriter - // and this size will be wrong when unbuffered access is used - // but tests implement their own writable files and do not use - // WritableFileWrapper - // so we need to squeeze a square peg through - // a round hole here. - return next_write_offset_; - } - - Status AllocateImpl(uint64_t offset, uint64_t len); - - public: - WinWritableImpl(const WinWritableImpl&) = delete; - WinWritableImpl& operator=(const WinWritableImpl&) = delete; -}; - -class WinWritableFile : private WinFileData, - protected WinWritableImpl, - public WritableFile { - public: - WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment, - size_t capacity, const EnvOptions& options); - - ~WinWritableFile(); - - bool IsSyncThreadSafe() const override { - return true; - } - - virtual Status Append(const Slice& data) override; - - // Requires that the data is aligned as specified by - // GetRequiredBufferAlignment() - virtual Status PositionedAppend(const Slice& data, uint64_t offset) override; - - // Need to implement this so the file is truncated correctly - // when buffered and unbuffered mode - virtual Status Truncate(uint64_t size) override; - - virtual Status Close() override; - - // write out the cached data to the OS cache - // This is now taken care of the WritableFileWriter - virtual Status Flush() override; - - virtual Status Sync() override; - - virtual Status Fsync() override; - - // Indicates if the class makes use of direct I/O - // Use PositionedAppend - virtual bool use_direct_io() const override; - - virtual size_t GetRequiredBufferAlignment() const override; - - virtual uint64_t GetFileSize() override; - - virtual Status Allocate(uint64_t offset, uint64_t len) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -class WinRandomRWFile : private WinFileData, - protected WinRandomAccessImpl, - protected WinWritableImpl, - public RandomRWFile { - public: - WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment, - const EnvOptions& options); - - ~WinRandomRWFile() {} - - // Indicates if the class makes use of direct I/O - // If false you must pass aligned buffer to Write() - virtual bool use_direct_io() const override; - - // Use the returned alignment value to allocate aligned - // buffer for Write() when use_direct_io() returns true - virtual size_t GetRequiredBufferAlignment() const override; - - // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. - // Pass aligned buffer when use_direct_io() returns true. - virtual Status Write(uint64_t offset, const Slice& data) override; - - // Read up to `n` bytes starting from offset `offset` and store them in - // result, provided `scratch` size should be at least `n`. - // Returns Status::OK() on success. - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual Status Flush() override; - - virtual Status Sync() override; - - virtual Status Fsync() { return Sync(); } - - virtual Status Close() override; -}; - -class WinDirectory : public Directory { - public: - WinDirectory() {} - - virtual Status Fsync() override; -}; - -class WinFileLock : public FileLock { - public: - explicit WinFileLock(HANDLE hFile) : hFile_(hFile) { - assert(hFile != NULL); - assert(hFile != INVALID_HANDLE_VALUE); - } - - ~WinFileLock(); - - private: - HANDLE hFile_; -}; -} -} From 5545753b0b21b98fbdd2d8b894bc141d1ed558b3 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Fri, 13 Apr 2018 00:39:51 +0300 Subject: [PATCH 14/54] Update test results In the upstream include/search_pattern_in_file.inc prints nothing when it has found the searched string (if it hasn't, it produces an error) In MariaDB, it prints "FOUND n ..." --- storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result index 5a1eeb9fa3f..5d34f4e9640 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result @@ -1,4 +1,5 @@ CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*"); +FOUND 1 /RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit/ in rocksdb.max_open_files.err SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files 1 From 78e42153b51d6166b916a5bdb39e8726913c263e Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Fri, 13 Apr 2018 20:26:40 +0300 Subject: [PATCH 15/54] Fix compile on windows: O_SYNC is not available, use a my_sync() call instead. --- storage/rocksdb/rdb_utils.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc index 19469d041b3..723e079a165 100644 --- a/storage/rocksdb/rdb_utils.cc +++ b/storage/rocksdb/rdb_utils.cc @@ -358,7 +358,8 @@ bool rdb_check_rocksdb_corruption() { void rdb_persist_corruption_marker() { const std::string &fileName(myrocks::rdb_corruption_marker_file_name()); - int fd = my_open(fileName.c_str(), O_CREAT | O_SYNC, MYF(MY_WME)); + /* O_SYNC is not supported on windows */ + int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME)); if (fd < 0) { sql_print_error("RocksDB: Can't create file %s to mark rocksdb as " "corrupted.", @@ -370,6 +371,12 @@ void rdb_persist_corruption_marker() { fileName.c_str()); } +#ifdef _WIN32 + /* A replacement for O_SYNC flag above */ + if (fd >= 0) + my_sync(fd, MYF(0)); +#endif + int ret = my_close(fd, MYF(MY_WME)); if (ret) { // NO_LINT_DEBUG From 8496e84f05e69a31e93653468b6e0579e154e8d8 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sun, 15 Apr 2018 12:52:27 +0300 Subject: [PATCH 16/54] Trivial post-merge fixes --- .../mysql-test/rocksdb/r/autoinc_debug.result | 12 ++++++------ .../rocksdb/r/skip_validate_tmp_table.result | 2 +- .../rocksdb/r/ttl_primary_read_filtering.result | 2 +- .../rocksdb/mysql-test/rocksdb/t/autoinc_debug.test | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result index 79030e35225..fe08cd7c361 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result @@ -2,7 +2,7 @@ # Testing upgrading from server without merges for auto_increment # to new server with such support. # -set debug='+d,myrocks_autoinc_upgrade'; +set debug_dbug='+d,myrocks_autoinc_upgrade'; create table t (i int primary key auto_increment); insert into t values (); insert into t values (); @@ -20,7 +20,7 @@ select table_name, index_name, auto_increment from information_schema.rocksdb_ddl where table_name = 't'; table_name index_name auto_increment t PRIMARY NULL -set debug='-d,myrocks_autoinc_upgrade'; +set debug_dbug='-d,myrocks_autoinc_upgrade'; insert into t values (); insert into t values (); insert into t values (); @@ -56,7 +56,7 @@ insert into t values (); begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_before"; +set debug_dbug="+d,crash_commit_before"; commit; ERROR HY000: Lost connection to MySQL server during query select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; @@ -69,7 +69,7 @@ max(i) begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after_prepare"; +set debug_dbug="+d,crash_commit_after_prepare"; commit; ERROR HY000: Lost connection to MySQL server during query select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; @@ -82,7 +82,7 @@ max(i) begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after_log"; +set debug_dbug="+d,crash_commit_after_log"; commit; ERROR HY000: Lost connection to MySQL server during query select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; @@ -95,7 +95,7 @@ max(i) begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after"; +set debug_dbug="+d,crash_commit_after"; commit; ERROR HY000: Lost connection to MySQL server during query select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result index df048748e05..d324793b89d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result @@ -1,9 +1,9 @@ create table t1 (pk int primary key) engine=rocksdb; show tables; -set session debug_dbug="+d,gen_sql_table_name"; Tables_in_test #mysql50#t1#sql-test t1 +set session debug_dbug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug_dbug= "-d,gen_sql_table_name"; show tables; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result index 5bb3229de17..c66b17926b7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result @@ -210,8 +210,8 @@ select variable_value-@a from information_schema.global_status where variable_na variable_value-@a 1 # Switching to connection 1 -select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; connection con1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 1 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test index df67338a0f1..abcae8d98a5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test @@ -7,7 +7,7 @@ --echo # to new server with such support. --echo # -set debug='+d,myrocks_autoinc_upgrade'; +set debug_dbug='+d,myrocks_autoinc_upgrade'; create table t (i int primary key auto_increment); insert into t values (); insert into t values (); @@ -20,7 +20,7 @@ select * from t; select table_name, index_name, auto_increment from information_schema.rocksdb_ddl where table_name = 't'; -set debug='-d,myrocks_autoinc_upgrade'; +set debug_dbug='-d,myrocks_autoinc_upgrade'; --source include/restart_mysqld.inc @@ -55,7 +55,7 @@ insert into t values (); begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_before"; +set debug_dbug="+d,crash_commit_before"; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --error 2013 commit; @@ -71,7 +71,7 @@ select max(i) from t; begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after_prepare"; +set debug_dbug="+d,crash_commit_after_prepare"; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --error 2013 commit; @@ -87,7 +87,7 @@ select max(i) from t; begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after_log"; +set debug_dbug="+d,crash_commit_after_log"; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --error 2013 commit; @@ -103,7 +103,7 @@ select max(i) from t; begin; insert into t values (); insert into t values (); -set debug="+d,crash_commit_after"; +set debug_dbug="+d,crash_commit_after"; --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --error 2013 commit; From 261d4755f4418d3a1b4dadf8d1ed5098ae3aa92f Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sun, 15 Apr 2018 14:34:56 +0300 Subject: [PATCH 17/54] Post-merge fixes: rocksdb.check_ignore_unknown_options --- .../rocksdb/r/check_ignore_unknown_options.result | 1 + .../mysql-test/rocksdb/t/check_ignore_unknown_options.test | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result index 24c1b730325..6ff49908a51 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result @@ -1,6 +1,7 @@ select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; variable_name variable_value ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON +FOUND 1 /RocksDB: Compatibility check against existing database options failed/ in my_restart.err select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; variable_name variable_value ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test index 25213544bb5..b4866de4d3b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test @@ -4,12 +4,13 @@ let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err; select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; ---exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" +--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --shutdown_server 10 + --error 1 ---exec $MYSQLD_CMD --rocksdb_ignore_unknown_options=0 --loose-console > $error_log 2>&1 +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --loose-console --log-error=$error_log let SEARCH_FILE= $error_log; let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed; @@ -17,5 +18,5 @@ let SEARCH_PATTERN= RocksDB: Compatibility check against existing database optio --enable_reconnect --exec echo "restart" > $restart_file --source include/wait_until_connected_again.inc ---exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}" +--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}" select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; From 0fad97a9ecbcb6a68ae396ea6f7a1dd4d933279c Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sun, 15 Apr 2018 15:34:06 +0300 Subject: [PATCH 18/54] Post-merge fixes: add a suppression for rocksdb.skip_validate_tmP_table --- .../mysql-test/rocksdb/r/skip_validate_tmp_table.result | 1 + .../rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test | 3 +++ 2 files changed, 4 insertions(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result index d324793b89d..92906f22b1e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result @@ -3,6 +3,7 @@ show tables; Tables_in_test #mysql50#t1#sql-test t1 +call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.'); set session debug_dbug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug_dbug= "-d,gen_sql_table_name"; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test index 80c366d26b0..c4321462dfd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test @@ -12,6 +12,9 @@ create table t1 (pk int primary key) engine=rocksdb; show tables; +# MariaDB produces a warning: +call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.'); + # This will append '#sql-test' to the end of new name set session debug_dbug="+d,gen_sql_table_name"; rename table t1 to t2; From ce4149fc9b5eda464919cefb6ea22d02879a6882 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sun, 15 Apr 2018 21:15:21 +0300 Subject: [PATCH 19/54] Post-merge fixes: make rocksdb.use_direct_reads_writes pass --- .../rocksdb/r/use_direct_reads_writes.result | 41 ++++++------------- .../rocksdb/t/use_direct_reads_writes.test | 1 - 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result index f601f75d188..e8456457cdd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result @@ -1,35 +1,18 @@ -Checking direct reads -Checking direct writes -Checking rocksdb_flush_log_at_trx_commit -Validate flush_log settings when direct writes is enabled -set global rocksdb_flush_log_at_trx_commit=0; -set global rocksdb_flush_log_at_trx_commit=1; -ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1' -set global rocksdb_flush_log_at_trx_commit=2; -ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2' call mtr.add_suppression("rocksdb"); call mtr.add_suppression("Aborting"); # This shows that RocksDB plugin is loaded: select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; plugin_name plugin_type ROCKSDB STORAGE ENGINE -# Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -# Check that MyRocks has printed an error message into server error log: -FOUND 1 /enable both use_direct_reads/ in mysqld.1.err -# Now, restart the server back with regular settings -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -ROCKSDB STORAGE ENGINE -# -# Now, repeat the same with another set of invalid arguments -# -# Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in mysqld.1.err -# Now, restart the server back with regular settings -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -ROCKSDB STORAGE ENGINE +Checking direct reads +FOUND 1 /enable both use_direct_reads/ in use_direct_reads_writes.err +Checking direct writes +FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in use_direct_reads_writes.err +Checking rocksdb_flush_log_at_trx_commit +FOUND 1 /rocksdb_flush_log_at_trx_commit needs to be/ in use_direct_reads_writes.err +Validate flush_log settings when direct writes is enabled +set global rocksdb_flush_log_at_trx_commit=0; +set global rocksdb_flush_log_at_trx_commit=1; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1' +set global rocksdb_flush_log_at_trx_commit=2; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2' diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test index 550cbd2753b..8dfbe312ea8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test @@ -34,7 +34,6 @@ select plugin_name, plugin_type from information_schema.plugins where plugin_nam --source include/search_pattern_in_file.inc --remove_file $LOG -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; # Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails --echo Checking rocksdb_flush_log_at_trx_commit From 40bed2d39e0409d3a1ff468f792ec00ae0c8e7df Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 16 Apr 2018 11:06:43 +0300 Subject: [PATCH 20/54] MyRocks: Add files lost during the merge Include files that upstream has in mysql-test/include and so they were not picked up by the merge process --- .../include/restart_mysqld_with_invalid_option.inc | 8 ++++++++ .../rocksdb/include/start_mysqld_with_option.inc | 14 ++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc new file mode 100644 index 00000000000..8eef7ed2162 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc @@ -0,0 +1,8 @@ +--source include/shutdown_mysqld.inc + +# Expect the server to fail to come up with these options +--error 1 +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option + +# Restart the server with the default options +--source include/start_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc new file mode 100644 index 00000000000..73e30b3e46c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc @@ -0,0 +1,14 @@ +# Include this script only after using shutdown_mysqld.inc +# where $_expect_file_name was initialized. +# Write file to make mysql-test-run.pl start up the server again +--exec echo "restart:$_mysqld_option" > $_expect_file_name + +# Turn on reconnect +--enable_reconnect + +# Call script that will poll the server waiting for it to be back online again +--source include/wait_until_connected_again.inc + +# Turn off reconnect again +--disable_reconnect + From 959362c0208c545af8047fa690a439c39290e138 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 16 Apr 2018 21:09:14 +0300 Subject: [PATCH 21/54] Fix compilation on windows --- sql/mysqld.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/mysqld.h b/sql/mysqld.h index 912fe3696ae..a2a97d0eee5 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -209,7 +209,7 @@ extern int max_user_connections; extern volatile ulong cached_thread_count; extern ulong what_to_log,flush_time; extern ulong max_prepared_stmt_count, prepared_stmt_count; -extern ulong open_files_limit; +extern MYSQL_PLUGIN_IMPORT ulong open_files_limit; extern ulonglong binlog_cache_size, binlog_stmt_cache_size; extern ulonglong max_binlog_cache_size, max_binlog_stmt_cache_size; extern ulong max_binlog_size; From a7e5049fec3d938dd19df82c3db047a2a0d9119f Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 17 Apr 2018 11:10:11 +0300 Subject: [PATCH 22/54] MyRocks: Fix link error on Windows rocksdb_aux_lib.lib(rdb_sst_info.obj) : error LNK2001: unresolved external symbol PSI_server [D:\winx64-packages\build\storage\rocksdb\rocksdb.vcxproj] rocksdb_aux_lib is an utility library used by standalone tools. These won't have PSI_server. FIxed by moving rdb_sst_info.* out of the library (as they do not seem to be used by these standalone tools) --- storage/rocksdb/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index d5e9b1914f0..953854d0e9f 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -95,6 +95,8 @@ SET(ROCKSDB_SE_SOURCES rdb_threads.h rdb_psi.h rdb_psi.cc + rdb_sst_info.cc + rdb_sst_info.h ) # MariaDB: the following is added in build_rocksdb.cmake, when appropriate: @@ -137,8 +139,6 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib event_listener.h rdb_perf_context.cc rdb_perf_context.h - rdb_sst_info.cc - rdb_sst_info.h rdb_buff.h rdb_mariadb_port.h ) From 6bea5e9e0f37499a9ff5f54e2e8f7665ec1ac317 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 17 Apr 2018 15:04:15 +0300 Subject: [PATCH 23/54] MyRocks: Post-merge fixes in bulk_load_error.test --- .../rocksdb/r/bulk_load_errors.result | 22 +++++++++++-------- .../rocksdb/t/bulk_load_errors.test | 9 +++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result index 86a88c30d89..3703c208d0b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result @@ -14,27 +14,30 @@ INSERT INTO t1 VALUES(1); INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); -# -# In MyRocks, the following statement will intentionally crash the server. -# In MariaDB, it will cause an error SET rocksdb_bulk_load=0; ERROR HY000: Rows inserted during bulk load must not overlap existing rows -# -# Despite the error, bulk load operation is over so the variable value -# will be 0: -select @@rocksdb_bulk_load; -@@rocksdb_bulk_load -0 +SHOW VARIABLES LIKE 'rocksdb_bulk_load'; +Variable_name Value +rocksdb_bulk_load OFF call mtr.add_suppression('finalizing last SST file while setting bulk loading variable'); +SELECT * FROM t1; +pk +10 +11 +FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable/ in rocksdb.bulk_load_errors.1.err +connect con1,localhost,root,,; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES(1); INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); +connection default; +disconnect con1; SELECT * FROM t1; pk 10 11 +FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while disconnecting/ in rocksdb.bulk_load_errors.2.err TRUNCATE TABLE t1; SET rocksdb_bulk_load_allow_unsorted=1; SET rocksdb_bulk_load=1; @@ -82,6 +85,7 @@ Warning 1292 Truncated incorrect table_open_cache value: '0' INSERT INTO t1 VALUES(51479+0.333333333,1); DROP TABLE t1; SET @@global.table_open_cache=@orig_table_open_cache; +FOUND 1 /RocksDB: Error [0-9]+ finalizing bulk load while closing handler/ in rocksdb.bulk_load_errors.3.err CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test index 25b98c5ca17..1e349d0ff18 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test @@ -33,13 +33,10 @@ SET rocksdb_bulk_load=0; SHOW VARIABLES LIKE 'rocksdb_bulk_load'; call mtr.add_suppression('finalizing last SST file while setting bulk loading variable'); ---echo # ---echo # Despite the error, bulk load operation is over so the variable value ---echo # will be 0: SELECT * FROM t1; --let SEARCH_FILE=$LOG1 ---let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while setting bulk loading variable +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable --source include/search_pattern_in_file.inc --let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err @@ -64,7 +61,7 @@ SELECT * FROM t1; --source include/wait_until_count_sessions.inc --let SEARCH_FILE=$LOG2 ---let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while disconnecting +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while disconnecting --source include/search_pattern_in_file.inc --let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err @@ -121,7 +118,7 @@ DROP TABLE t1; SET @@global.table_open_cache=@orig_table_open_cache; --let SEARCH_FILE=$LOG3 ---let SEARCH_PATTERN=RocksDB: Error 198 finalizing bulk load while closing handler +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing bulk load while closing handler --source include/search_pattern_in_file.inc --source include/restart_mysqld.inc From 955233256ee26486dc55ce50e759e05ad88546fc Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 19 Apr 2018 15:41:13 +0300 Subject: [PATCH 24/54] MyRocks: fix rocksdb.information_schema testcase. "The Store binlog position inside RocksDB" feature is only needed for obtaining binlog position after having restored a MyRocks backup. This is not yet supported in MariaDB, so properly disable it in both places where it is done. --- storage/rocksdb/ha_rocksdb.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 5f19faca6cb..6e9e7ef4832 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -2079,10 +2079,16 @@ protected: rollback(); return true; } else { +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ mysql_bin_log_commit_pos(m_thd, &m_mysql_log_offset, &m_mysql_log_file_name); binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset, get_write_batch()); +#endif return commit_no_binlog(); } } @@ -3089,7 +3095,11 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) /* We were instructed to prepare the whole transaction, or this is an SQL statement end and autocommit is on */ -#ifdef MARIAROCKS_NOT_YET // Crash-safe slave does not work yet +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ std::vector slave_gtid_info; my_core::thd_slave_gtid_info(thd, &slave_gtid_info); for (const auto &it : slave_gtid_info) { From 39fbafbcc22cd51c1cbca8a06320394e94a9cd50 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 19 Apr 2018 16:28:05 +0300 Subject: [PATCH 25/54] Post-merge fixes: make rocksdb.allow_to_start_after_corruption pass --- .../rocksdb/r/allow_to_start_after_corruption.result | 7 +++++-- .../rocksdb/t/allow_to_start_after_corruption.test | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result index 1a2abbf3285..9b5a335b6f8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result @@ -12,9 +12,10 @@ insert into t1 values (1,1),(2,2),(3,3); select * from t1 where pk=1; pk col1 1 1 -set session debug= "+d,rocksdb_return_status_corrupted"; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; select * from t1 where pk=1; ERROR HY000: Lost connection to MySQL server during query +FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err # # The same for scan queries # @@ -23,12 +24,14 @@ pk col1 1 1 2 2 3 3 -set session debug= "+d,rocksdb_return_status_corrupted"; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; select * from t1; ERROR HY000: Lost connection to MySQL server during query +FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err # # Test restart failure. The server is shutdown at this point. # +FOUND 1 /The server will exit normally and stop restart attempts/ in allow_to_start_after_corruption_debug.err # # Remove corruption file and restart cleanly # diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test index 1863c3247b8..67b2d5f96d7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test @@ -26,7 +26,7 @@ create table t1 ( insert into t1 values (1,1),(2,2),(3,3); select * from t1 where pk=1; -set session debug= "+d,rocksdb_return_status_corrupted"; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --error 2013 select * from t1 where pk=1; @@ -41,7 +41,7 @@ select * from t1 where pk=1; --source include/start_mysqld_with_option.inc select * from t1; -set session debug= "+d,rocksdb_return_status_corrupted"; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; --exec echo "wait" > $_expect_file_name --error 2013 select * from t1; @@ -57,7 +57,7 @@ select * from t1; # remove flag to ignore corruption --let $_mysqld_option=--log-error=$LOG --error 0 ---exec $MYSQLD_CMD $_mysqld_option +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option --let SEARCH_PATTERN=The server will exit normally and stop restart attempts --source include/search_pattern_in_file.inc --remove_file $LOG @@ -66,7 +66,7 @@ select * from t1; --echo # Remove corruption file and restart cleanly --echo # ---exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/.rocksdb/ROCKSDB_CORRUPTED +--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/#rocksdb/ROCKSDB_CORRUPTED --source include/start_mysqld_with_option.inc drop table t1; From e3661b9f7c60aa471aaa79e597723e897caf320c Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 7 May 2018 20:21:35 +0300 Subject: [PATCH 26/54] Cherry-picked from MyRocks upstream: Issue #809: Wrong query result with bloom filters In reverse-ordered column families, if one wants to start reading at the logical end of the index, they should Seek() to a key value that is not covered by the index. This may (and typically does) prevent use of a bloom filter. The calls to setup_scan_iterator() that are made for index and table scan didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE. Fixed them to compute and pass correct eq_cond_len. Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds. --- storage/rocksdb/ha_rocksdb.cc | 9 ++- .../mysql-test/rocksdb/r/bloomfilter5.result | 62 +++++++++++++++++++ .../rocksdb/t/bloomfilter5-master.opt | 1 + .../mysql-test/rocksdb/t/bloomfilter5.test | 61 ++++++++++++++++++ storage/rocksdb/rdb_datadic.h | 26 ++++++-- 5 files changed, 149 insertions(+), 10 deletions(-) create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 6e9e7ef4832..e7abac7246b 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - kd.get_first_key(key, &key_size); + int key_start_matching_bytes = kd.get_first_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes); m_scan_it->Seek(index_key); m_skip_scan_it_next_call = true; @@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, const rocksdb::Slice &eq_cond) { uint eq_cond_len = eq_cond.size(); - DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE); memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); kd.successor(m_eq_cond_upper_bound, eq_cond_len); memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); @@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); setup_scan_iterator(*m_pk_descr, &table_key, false, - Rdb_key_def::INDEX_NUMBER_SIZE); + key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result new file mode 100644 index 00000000000..4f6702b85a7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result @@ -0,0 +1,62 @@ +# +# Issue #809: Wrong query result with bloom filters +# +create table t1 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t3(seq int); +insert into t3 +select +1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; +insert t1 +select +(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; +set global rocksdb_force_flush_memtable_now=1; +# Full table scan +explain +select * from t1 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 +select * from t1 limit 10; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +1000 2000 2000 9999 9999 1000 aaabbbccc +1000 2000 2000 9998 9998 1000 aaabbbccc +1000 2000 2000 9997 9997 1000 aaabbbccc +1000 2000 2000 9996 9996 1000 aaabbbccc +1000 1999 1999 9995 9995 1000 aaabbbccc +1000 1999 1999 9994 9994 1000 aaabbbccc +1000 1999 1999 9993 9993 1000 aaabbbccc +1000 1999 1999 9992 9992 1000 aaabbbccc +1000 1999 1999 9991 9991 1000 aaabbbccc +# An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 122 NULL 1 +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +create table t4 ( +pk int unsigned not null primary key, +kp1 int unsigned not null, +kp2 int unsigned not null, +col1 int unsigned, +key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); +# This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; +pk kp1 kp2 col1 +drop table t1,t2,t3,t4; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt new file mode 100644 index 00000000000..7d63dc74bb8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt @@ -0,0 +1 @@ +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}}; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test new file mode 100644 index 00000000000..00968aebb62 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test @@ -0,0 +1,61 @@ + +--echo # +--echo # Issue #809: Wrong query result with bloom filters +--echo # + +create table t1 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; + + +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t3(seq int); +insert into t3 +select + 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; + +insert t1 +select + (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # Full table scan +explain +select * from t1 limit 10; +select * from t1 limit 10; + +--echo # An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; + +# A testcase for an assertion that the fix is removing +# The only requirement for the used column family is that it is reverse-ordered +create table t4 ( + pk int unsigned not null primary key, + kp1 int unsigned not null, + kp2 int unsigned not null, + col1 int unsigned, + key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; + +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); + +--echo # This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; + +drop table t1,t2,t3,t4; + + diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 326570c433d..f97c0d08d29 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -238,12 +238,28 @@ public: *size = INDEX_NUMBER_SIZE; } - /* Get the first key that you need to position at to start iterating. - Returns a "supremum" or "infimum" for this index based on collation order + /* + Get the first key that you need to position at to start iterating. + + Stores into *key a "supremum" or "infimum" key value for the index. + + @return Number of bytes in the key that are usable for bloom filter use. */ - inline void get_first_key(uchar *const key, uint *const size) const { - return m_is_reverse_cf ? get_supremum_key(key, size) - : get_infimum_key(key, size); + inline int get_first_key(uchar *const key, uint *const size) const { + if (m_is_reverse_cf) + get_supremum_key(key, size); + else + get_infimum_key(key, size); + + /* Find out how many bytes of infimum are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) + break; + } + return i; } /* Make a key that is right after the given key. */ From 03edf2ed04dbffe8c413fe0dd2715684e1627371 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 7 May 2018 20:33:14 +0300 Subject: [PATCH 27/54] Undo the incorrect part of commit 7e700bd2a81ae4b37145f1c32bb0902c72856d2d Restore the old code in opt_range.cc --- sql/opt_range.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index bc047b25f30..cc5746925a2 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -11677,8 +11677,8 @@ int QUICK_SELECT_DESC::get_next() end_key.flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); end_key.keypart_map= last_range->max_keypart_map; - result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &end_key, - (last_range->flag & NO_MAX_RANGE) ? NULL : &start_key); + result= file->prepare_range_scan((last_range->flag & NO_MIN_RANGE) ? NULL : &start_key, + (last_range->flag & NO_MAX_RANGE) ? NULL : &end_key); if (result) { DBUG_RETURN(result); From 45ef87ee92904316fb37e1513705cdce263c61b1 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 7 May 2018 22:13:18 +0300 Subject: [PATCH 28/54] Post-merge fixes: fix rocksdb.rocksdb_checksums test (see also: MDEV-15911) --- .../rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt index 792e3808f1e..320c48e5563 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt @@ -1,2 +1,3 @@ --rocksdb_debug_optimizer_n_rows=1000 --rocksdb_records_in_range=50 +--log_warnings=3 From 4d51009a772fdfffc6cc7ae87422a3b0b19fa06a Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 8 May 2018 13:00:26 +0300 Subject: [PATCH 29/54] MyRocks: fix rocksdb.rocksdb_range test attempt 3 --- storage/rocksdb/ha_rocksdb.cc | 17 ++++++++++++++++- storage/rocksdb/ha_rocksdb.h | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index baada708c61..57efc711d61 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7925,6 +7925,15 @@ int ha_rocksdb::prepare_range_scan(const key_range *start_key, const key_range *end_key) { range_key_part= table->key_info[active_index].key_part; + + if (start_key) + { + m_save_start_range= *start_key; + m_start_range= &m_save_start_range; + } + else + m_start_range= NULL; + set_end_range(end_key); return 0; } @@ -8054,8 +8063,12 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, packed_size); uint end_key_packed_size = 0; + const key_range *cur_end_key= end_key; + if (find_flag == HA_READ_PREFIX_LAST_OR_PREV) + cur_end_key= m_start_range; + const uint eq_cond_len = - calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, end_key, + calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, cur_end_key, &end_key_packed_size); bool use_all_keys = false; @@ -9996,6 +10009,7 @@ int ha_rocksdb::index_init(uint idx, bool sorted) { DBUG_ASSERT(tx != nullptr); setup_read_decoders(); + m_start_range= NULL; if (!m_keyread_only) { m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); @@ -10024,6 +10038,7 @@ int ha_rocksdb::index_end() { active_index = MAX_KEY; in_range_check_pushed_down = FALSE; + m_start_range= NULL; DBUG_RETURN(HA_EXIT_SUCCESS); } diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 0b008a64390..6083dd73020 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -493,6 +493,10 @@ class ha_rocksdb : public my_core::handler { /* Iterator used for range scans and for full table/index scans */ rocksdb::Iterator *m_scan_it; + + /* Same as handler::end_key but for start. Reverse-ordered scans need it */ + key_range m_save_start_range; + const key_range *m_start_range; /* Whether m_scan_it was created with skip_bloom=true */ bool m_scan_it_skips_bloom; From 7d521bcc461acbe9e8cf9dd70ce0fb837de4bcb8 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Wed, 9 May 2018 17:43:08 -0400 Subject: [PATCH 30/54] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 5fe6561ce82..f9ccf7c6188 100644 --- a/VERSION +++ b/VERSION @@ -1,3 +1,3 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=1 -MYSQL_VERSION_PATCH=33 +MYSQL_VERSION_PATCH=34 From 318097bb8f6e12c546b5dcd287416158209dbb39 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Thu, 10 May 2018 19:00:54 +0400 Subject: [PATCH 31/54] MDEV-15480 Audit plugin does not respect QUERY_DML for audit plugin. QUERY_DML_NO_SELECT flag added. --- .../suite/plugins/r/server_audit.result | 12 ++++++++ mysql-test/suite/plugins/t/server_audit.test | 7 +++++ plugin/server_audit/server_audit.c | 29 ++++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result index 80b434553e5..2e18a489dc7 100644 --- a/mysql-test/suite/plugins/r/server_audit.result +++ b/mysql-test/suite/plugins/r/server_audit.result @@ -182,6 +182,17 @@ select 2; 2 2 drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +id +1 +2 +select 2; +2 +2 +drop table t1; set global server_audit_events=''; set global server_audit_query_log_limit= 15; select (1), (2), (3), (4); @@ -332,6 +343,7 @@ TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD \n# comment\nFOR u1 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD FOR u1=',ID TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'CREATE USER u3 IDENTIFIED BY *****',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'drop user u1, u2, u3',0 +TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'insert into t1 values (1), (2)',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global server_audit_events=\'\'',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global serv',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'select (1), (2)',0 diff --git a/mysql-test/suite/plugins/t/server_audit.test b/mysql-test/suite/plugins/t/server_audit.test index 6c5eaffd9a2..4af1ed883e3 100644 --- a/mysql-test/suite/plugins/t/server_audit.test +++ b/mysql-test/suite/plugins/t/server_audit.test @@ -121,6 +121,13 @@ select 2; /*! select 2*/; /*comment*/ select 2; drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +select 2; +drop table t1; + set global server_audit_events=''; set global server_audit_query_log_limit= 15; diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index 9a3418e7784..d27da280a2a 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -15,7 +15,7 @@ #define PLUGIN_VERSION 0x104 -#define PLUGIN_STR_VERSION "1.4.3" +#define PLUGIN_STR_VERSION "1.4.4" #define _my_thread_var loc_thread_var @@ -366,16 +366,17 @@ static MYSQL_SYSVAR_STR(excl_users, excl_users, PLUGIN_VAR_RQCMDARG, /* bits in the event filter. */ #define EVENT_CONNECT 1 #define EVENT_QUERY_ALL 2 -#define EVENT_QUERY 58 +#define EVENT_QUERY 122 #define EVENT_TABLE 4 #define EVENT_QUERY_DDL 8 #define EVENT_QUERY_DML 16 #define EVENT_QUERY_DCL 32 +#define EVENT_QUERY_DML_NO_SELECT 64 static const char *event_names[]= { "CONNECT", "QUERY", "TABLE", "QUERY_DDL", "QUERY_DML", "QUERY_DCL", - NULL + "QUERY_DML_NO_SELECT", NULL }; static TYPELIB events_typelib= { @@ -383,7 +384,7 @@ static TYPELIB events_typelib= }; static MYSQL_SYSVAR_SET(events, events, PLUGIN_VAR_RQCMDARG, "Specifies the set of events to monitor. Can be CONNECT, QUERY, TABLE," - " QUERY_DDL, QUERY_DML, QUERY_DCL.", + " QUERY_DDL, QUERY_DML, QUERY_DML_NO_SELECT, QUERY_DCL.", NULL, NULL, 0, &events_typelib); #define OUTPUT_SYSLOG 0 #define OUTPUT_FILE 1 @@ -857,6 +858,21 @@ struct sa_keyword dml_keywords[]= }; +struct sa_keyword dml_no_select_keywords[]= +{ + {2, "DO", 0, SQLCOM_DML}, + {4, "CALL", 0, SQLCOM_DML}, + {4, "LOAD", &data_word, SQLCOM_DML}, + {4, "LOAD", &xml_word, SQLCOM_DML}, + {6, "DELETE", 0, SQLCOM_DML}, + {6, "INSERT", 0, SQLCOM_DML}, + {6, "UPDATE", 0, SQLCOM_DML}, + {7, "HANDLER", 0, SQLCOM_DML}, + {7, "REPLACE", 0, SQLCOM_DML}, + {0, NULL, 0, SQLCOM_DML} +}; + + struct sa_keyword dcl_keywords[]= { {6, "CREATE", &user_word, SQLCOM_DCL}, @@ -1637,6 +1653,11 @@ static int log_statement_ex(const struct connection_info *cn, if (filter_query_type(query, dml_keywords)) goto do_log_query; } + if (events & EVENT_QUERY_DML_NO_SELECT) + { + if (filter_query_type(query, dml_no_select_keywords)) + goto do_log_query; + } if (events & EVENT_QUERY_DCL) { if (filter_query_type(query, dcl_keywords)) From 3cbfe8cc47d48ff2c528149b4866480b50d8a116 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Thu, 10 May 2018 19:00:54 +0400 Subject: [PATCH 32/54] MDEV-15480 Audit plugin does not respect QUERY_DML for audit plugin. QUERY_DML_NO_SELECT flag added. --- .../suite/plugins/r/server_audit.result | 12 ++++++++ mysql-test/suite/plugins/t/server_audit.test | 7 +++++ plugin/server_audit/server_audit.c | 29 ++++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result index 3971504b238..5c355b34ba1 100644 --- a/mysql-test/suite/plugins/r/server_audit.result +++ b/mysql-test/suite/plugins/r/server_audit.result @@ -182,6 +182,17 @@ select 2; 2 2 drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +id +1 +2 +select 2; +2 +2 +drop table t1; set global server_audit_events=''; set global server_audit_query_log_limit= 15; select (1), (2), (3), (4); @@ -343,6 +354,7 @@ TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD \n# comment\nFOR u1 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD FOR u1=',ID TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'CREATE USER u3 IDENTIFIED BY *****',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'drop user u1, u2, u3',0 +TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'insert into t1 values (1), (2)',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global server_audit_events=\'\'',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global serv',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'select (1), (2)',0 diff --git a/mysql-test/suite/plugins/t/server_audit.test b/mysql-test/suite/plugins/t/server_audit.test index 6c5eaffd9a2..4af1ed883e3 100644 --- a/mysql-test/suite/plugins/t/server_audit.test +++ b/mysql-test/suite/plugins/t/server_audit.test @@ -121,6 +121,13 @@ select 2; /*! select 2*/; /*comment*/ select 2; drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +select 2; +drop table t1; + set global server_audit_events=''; set global server_audit_query_log_limit= 15; diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index 323179d5f84..17d3154089f 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -15,7 +15,7 @@ #define PLUGIN_VERSION 0x104 -#define PLUGIN_STR_VERSION "1.4.3" +#define PLUGIN_STR_VERSION "1.4.4" #define _my_thread_var loc_thread_var @@ -364,16 +364,17 @@ static MYSQL_SYSVAR_STR(excl_users, excl_users, PLUGIN_VAR_RQCMDARG, /* bits in the event filter. */ #define EVENT_CONNECT 1 #define EVENT_QUERY_ALL 2 -#define EVENT_QUERY 58 +#define EVENT_QUERY 122 #define EVENT_TABLE 4 #define EVENT_QUERY_DDL 8 #define EVENT_QUERY_DML 16 #define EVENT_QUERY_DCL 32 +#define EVENT_QUERY_DML_NO_SELECT 64 static const char *event_names[]= { "CONNECT", "QUERY", "TABLE", "QUERY_DDL", "QUERY_DML", "QUERY_DCL", - NULL + "QUERY_DML_NO_SELECT", NULL }; static TYPELIB events_typelib= { @@ -381,7 +382,7 @@ static TYPELIB events_typelib= }; static MYSQL_SYSVAR_SET(events, events, PLUGIN_VAR_RQCMDARG, "Specifies the set of events to monitor. Can be CONNECT, QUERY, TABLE," - " QUERY_DDL, QUERY_DML, QUERY_DCL.", + " QUERY_DDL, QUERY_DML, QUERY_DML_NO_SELECT, QUERY_DCL.", NULL, NULL, 0, &events_typelib); #define OUTPUT_SYSLOG 0 #define OUTPUT_FILE 1 @@ -855,6 +856,21 @@ struct sa_keyword dml_keywords[]= }; +struct sa_keyword dml_no_select_keywords[]= +{ + {2, "DO", 0, SQLCOM_DML}, + {4, "CALL", 0, SQLCOM_DML}, + {4, "LOAD", &data_word, SQLCOM_DML}, + {4, "LOAD", &xml_word, SQLCOM_DML}, + {6, "DELETE", 0, SQLCOM_DML}, + {6, "INSERT", 0, SQLCOM_DML}, + {6, "UPDATE", 0, SQLCOM_DML}, + {7, "HANDLER", 0, SQLCOM_DML}, + {7, "REPLACE", 0, SQLCOM_DML}, + {0, NULL, 0, SQLCOM_DML} +}; + + struct sa_keyword dcl_keywords[]= { {6, "CREATE", &user_word, SQLCOM_DCL}, @@ -1636,6 +1652,11 @@ static int log_statement_ex(const struct connection_info *cn, if (filter_query_type(query, dml_keywords)) goto do_log_query; } + if (events & EVENT_QUERY_DML_NO_SELECT) + { + if (filter_query_type(query, dml_no_select_keywords)) + goto do_log_query; + } if (events & EVENT_QUERY_DCL) { if (filter_query_type(query, dcl_keywords)) From dab4abbb09d1b204a64187c17de88234da3071a6 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Thu, 10 May 2018 19:00:54 +0400 Subject: [PATCH 33/54] MDEV-15480 Audit plugin does not respect QUERY_DML for audit plugin. QUERY_DML_NO_SELECT flag added. --- .../suite/plugins/r/server_audit.result | 12 ++++++++ mysql-test/suite/plugins/t/server_audit.test | 7 +++++ plugin/server_audit/server_audit.c | 29 ++++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result index 3971504b238..5c355b34ba1 100644 --- a/mysql-test/suite/plugins/r/server_audit.result +++ b/mysql-test/suite/plugins/r/server_audit.result @@ -182,6 +182,17 @@ select 2; 2 2 drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +id +1 +2 +select 2; +2 +2 +drop table t1; set global server_audit_events=''; set global server_audit_query_log_limit= 15; select (1), (2), (3), (4); @@ -343,6 +354,7 @@ TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD \n# comment\nFOR u1 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'SET PASSWORD FOR u1=',ID TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'CREATE USER u3 IDENTIFIED BY *****',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'drop user u1, u2, u3',0 +TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'insert into t1 values (1), (2)',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global server_audit_events=\'\'',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'set global serv',0 TIME,HOSTNAME,root,localhost,ID,ID,QUERY,sa_db,'select (1), (2)',0 diff --git a/mysql-test/suite/plugins/t/server_audit.test b/mysql-test/suite/plugins/t/server_audit.test index 6c5eaffd9a2..4af1ed883e3 100644 --- a/mysql-test/suite/plugins/t/server_audit.test +++ b/mysql-test/suite/plugins/t/server_audit.test @@ -121,6 +121,13 @@ select 2; /*! select 2*/; /*comment*/ select 2; drop table t1; +set global server_audit_events='query_dml_no_select'; +create table t1(id int); +insert into t1 values (1), (2); +select * from t1; +select 2; +drop table t1; + set global server_audit_events=''; set global server_audit_query_log_limit= 15; diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index 0a266ab19fe..b5f6e996b42 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -15,7 +15,7 @@ #define PLUGIN_VERSION 0x104 -#define PLUGIN_STR_VERSION "1.4.3" +#define PLUGIN_STR_VERSION "1.4.4" #define _my_thread_var loc_thread_var @@ -364,16 +364,17 @@ static MYSQL_SYSVAR_STR(excl_users, excl_users, PLUGIN_VAR_RQCMDARG, /* bits in the event filter. */ #define EVENT_CONNECT 1 #define EVENT_QUERY_ALL 2 -#define EVENT_QUERY 58 +#define EVENT_QUERY 122 #define EVENT_TABLE 4 #define EVENT_QUERY_DDL 8 #define EVENT_QUERY_DML 16 #define EVENT_QUERY_DCL 32 +#define EVENT_QUERY_DML_NO_SELECT 64 static const char *event_names[]= { "CONNECT", "QUERY", "TABLE", "QUERY_DDL", "QUERY_DML", "QUERY_DCL", - NULL + "QUERY_DML_NO_SELECT", NULL }; static TYPELIB events_typelib= { @@ -381,7 +382,7 @@ static TYPELIB events_typelib= }; static MYSQL_SYSVAR_SET(events, events, PLUGIN_VAR_RQCMDARG, "Specifies the set of events to monitor. Can be CONNECT, QUERY, TABLE," - " QUERY_DDL, QUERY_DML, QUERY_DCL.", + " QUERY_DDL, QUERY_DML, QUERY_DML_NO_SELECT, QUERY_DCL.", NULL, NULL, 0, &events_typelib); #define OUTPUT_SYSLOG 0 #define OUTPUT_FILE 1 @@ -855,6 +856,21 @@ struct sa_keyword dml_keywords[]= }; +struct sa_keyword dml_no_select_keywords[]= +{ + {2, "DO", 0, SQLCOM_DML}, + {4, "CALL", 0, SQLCOM_DML}, + {4, "LOAD", &data_word, SQLCOM_DML}, + {4, "LOAD", &xml_word, SQLCOM_DML}, + {6, "DELETE", 0, SQLCOM_DML}, + {6, "INSERT", 0, SQLCOM_DML}, + {6, "UPDATE", 0, SQLCOM_DML}, + {7, "HANDLER", 0, SQLCOM_DML}, + {7, "REPLACE", 0, SQLCOM_DML}, + {0, NULL, 0, SQLCOM_DML} +}; + + struct sa_keyword dcl_keywords[]= { {6, "CREATE", &user_word, SQLCOM_DCL}, @@ -1636,6 +1652,11 @@ static int log_statement_ex(const struct connection_info *cn, if (filter_query_type(query, dml_keywords)) goto do_log_query; } + if (events & EVENT_QUERY_DML_NO_SELECT) + { + if (filter_query_type(query, dml_no_select_keywords)) + goto do_log_query; + } if (events & EVENT_QUERY_DCL) { if (filter_query_type(query, dcl_keywords)) From b0269816a52d7fb3d8893afbafc60b27aca30815 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 10 May 2018 19:05:13 +0300 Subject: [PATCH 34/54] MyRocks: post-merge fixes for Windows: take into account FN_LIBCHAR2 Table name may be passed either as "./db/table" or as ".\\db\\table". --- storage/rocksdb/ha_rocksdb.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 57efc711d61..46d963860a2 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7141,12 +7141,17 @@ int rdb_normalize_tablename(const std::string &tablename, std::string *const strbuf) { DBUG_ASSERT(strbuf != nullptr); - if (tablename.size() < 2 || tablename[0] != '.' || tablename[1] != FN_LIBCHAR) { + if (tablename.size() < 2 || tablename[0] != '.' || + (tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) { DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; } size_t pos = tablename.find_first_of(FN_LIBCHAR, 2); + if (pos == std::string::npos) { + pos = tablename.find_first_of(FN_LIBCHAR2, 2); + } + if (pos == std::string::npos) { DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; From ffb48234df38630aeb3c34d72aba1473348c7076 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 10 May 2018 20:16:15 +0300 Subject: [PATCH 35/54] MyRocks on windows: make bulk_load_unsorted pass. It produced warnings due to perl code printing \r\n into the text file which is then used by LOAD DATA INFILE. --- .../rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc index 4a3158e814c..151ec0ccf14 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -83,6 +83,7 @@ eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") perl; my $fn = $ENV{'ROCKSDB_INFILE'}; open(my $fh, '>', $fn) || die "perl open($fn): $!"; +binmode $fh; my $max = 5000000; my $sign = 1; for (my $ii = 0; $ii < $max; $ii++) From e5bd75fb4e246b9f0494aab05dd557b18d5921cd Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 10 May 2018 21:46:57 +0300 Subject: [PATCH 36/54] MyRocks: disable rocksdb.check_ignore_unknown_options on Windows --- .../mysql-test/rocksdb/t/check_ignore_unknown_options.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test index b4866de4d3b..3316e7ea987 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test @@ -1,3 +1,7 @@ +# MariaDB: "xargs" is not present on windows builders. +# we could work around this but this is not a priority. +--source include/not_windows.inc + --disable_warnings let $MYSQLD_DATADIR= `select @@datadir`; let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; From 16319409bf53ffb9860d82778ae0997a7a19d381 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 8 May 2018 15:00:17 +0530 Subject: [PATCH 37/54] MDEV-15853: Assertion `tab->filesort_result == 0' failed The issue here is that the window function execution is not called for the correct join tab, when we have GROUP BY where we create extra temporary tables then we need to call window function execution for the last join tab. For doing so the current code does not take into account the JOIN::aggr_tables. Fixed by introducing a new function JOIN::total_join_tab_cnt that takes in account the temporary tables also. --- mysql-test/r/win.result | 15 +++++++++++++++ mysql-test/t/win.test | 15 +++++++++++++++ sql/sql_select.cc | 2 +- sql/sql_select.h | 9 +++++++++ sql/sql_window.cc | 2 +- 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result index e3cb40e8343..b519b2bb223 100644 --- a/mysql-test/r/win.result +++ b/mysql-test/r/win.result @@ -3299,3 +3299,18 @@ ROW_NUMBER() OVER() i SELECT ROW_NUMBER() OVER(), i FROM t1 WHERE 0; ROW_NUMBER() OVER() i DROP TABLE t1; +# +# MDEV-15853: Assertion `tab->filesort_result == 0' failed +# +CREATE TABLE t1 ( a1 int); +insert into t1 values (1),(2),(3); +CREATE TABLE t2 (b1 int, a1 int, a2 int); +insert into t2 values (1,2,3),(2,3,4),(3,4,5); +SELECT COUNT(DISTINCT t2.a2), +rank() OVER (ORDER BY t2.b1) +FROM t2 ,t1 GROUP BY t2.b1 ORDER BY t1.a1; +COUNT(DISTINCT t2.a2) rank() OVER (ORDER BY t2.b1) +1 1 +1 2 +1 3 +DROP TABLE t1,t2; diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test index 95ffb6d9909..b354a55d0d6 100644 --- a/mysql-test/t/win.test +++ b/mysql-test/t/win.test @@ -2067,3 +2067,18 @@ SELECT DISTINCT ROW_NUMBER() OVER(), i FROM t1 WHERE 0; SELECT ROW_NUMBER() OVER(), i FROM t1 WHERE 0; DROP TABLE t1; +--echo # +--echo # MDEV-15853: Assertion `tab->filesort_result == 0' failed +--echo # + +CREATE TABLE t1 ( a1 int); +insert into t1 values (1),(2),(3); + +CREATE TABLE t2 (b1 int, a1 int, a2 int); +insert into t2 values (1,2,3),(2,3,4),(3,4,5); + +--sorted_result +SELECT COUNT(DISTINCT t2.a2), + rank() OVER (ORDER BY t2.b1) +FROM t2 ,t1 GROUP BY t2.b1 ORDER BY t1.a1; +DROP TABLE t1,t2; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 4df4c0c87e1..4cd4754596b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2851,7 +2851,7 @@ bool JOIN::make_aggr_tables_info() - duplicate value removal Both of these operations are done after window function computation step. */ - curr_tab= join_tab + exec_join_tab_cnt() + aggr_tables - 1; + curr_tab= join_tab + total_join_tab_cnt(); if (select_lex->window_funcs.elements) { curr_tab->window_funcs_step= new Window_funcs_computation; diff --git a/sql/sql_select.h b/sql/sql_select.h index 2003cc1211b..a134d601c76 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1503,6 +1503,15 @@ public: /* Number of tables actually joined at the top level */ uint exec_join_tab_cnt() { return tables_list ? top_join_tab_count : 0; } + /* + Number of tables in the join which also includes the temporary tables + created for GROUP BY, DISTINCT , WINDOW FUNCTION etc. + */ + uint total_join_tab_cnt() + { + return exec_join_tab_cnt() + aggr_tables - 1; + } + int prepare(TABLE_LIST *tables, uint wind_num, COND *conds, uint og_num, ORDER *order, bool skip_order_by, ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select, diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 4e1e64365ae..cd09e174808 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -2754,7 +2754,7 @@ bool Window_func_runner::exec(THD *thd, TABLE *tbl, SORT_INFO *filesort_result) bool Window_funcs_sort::exec(JOIN *join) { THD *thd= join->thd; - JOIN_TAB *join_tab= join->join_tab + join->exec_join_tab_cnt(); + JOIN_TAB *join_tab= join->join_tab + join->total_join_tab_cnt(); /* Sort the table based on the most specific sorting criteria of the window functions. */ From c686483264119ac636327e2c09db69c29953bbf9 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Fri, 11 May 2018 13:55:03 +0300 Subject: [PATCH 38/54] MDEV-12427: rocksdb.write_sync fails Enable the test as the cause of the failure has been fixed --- storage/rocksdb/mysql-test/rocksdb/t/write_sync.test | 1 - 1 file changed, 1 deletion(-) diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test index e97a0b0bcc9..7c30d4fcbdb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test @@ -1,5 +1,4 @@ --source include/have_rocksdb.inc ---source include/not_windows.inc # MDEV-12427 SET GLOBAL rocksdb_write_disable_wal=false; SET GLOBAL rocksdb_write_ignore_missing_column_families=true; From 580a8061a752946cfd9d6e57f88d46ce099d11c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 13:48:57 +0300 Subject: [PATCH 39/54] Remove a redundant condition added by the 5.6.40 merge When Oracle fixed MDEV-13899 in their own way, they moved the condition to the only caller of PageConverter::update_records(). Thus, the merge of 5.6.40 into MariaDB added a redundant condition. PageConverter::update_records(): Move the page_is_leaf() condition to the only caller, PageConverter::update_index_page(). --- storage/innobase/row/row0import.cc | 10 +--------- storage/xtradb/row/row0import.cc | 6 +----- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index df696f3188d..f93d004d8f9 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1820,10 +1820,6 @@ PageConverter::update_records( m_rec_iter.open(block); - if (!page_is_leaf(block->frame)) { - return DB_SUCCESS; - } - while (!m_rec_iter.end()) { rec_t* rec = m_rec_iter.current(); @@ -1928,11 +1924,7 @@ PageConverter::update_index_page( return(DB_SUCCESS); } - if (!page_is_leaf(block->frame)) { - return (DB_SUCCESS); - } - - return(update_records(block)); + return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS; } /** diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc index 26ed99a483c..39e450bd7c5 100644 --- a/storage/xtradb/row/row0import.cc +++ b/storage/xtradb/row/row0import.cc @@ -1820,10 +1820,6 @@ PageConverter::update_records( m_rec_iter.open(block); - if (!page_is_leaf(block->frame)) { - return DB_SUCCESS; - } - while (!m_rec_iter.end()) { rec_t* rec = m_rec_iter.current(); ibool deleted = rec_get_deleted_flag(rec, comp); @@ -1927,7 +1923,7 @@ PageConverter::update_index_page( return(DB_SUCCESS); } - return(update_records(block)); + return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS; } /** From 9c03ba8f0d8e579432671d5bdde5f1b5aca3954c Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Wed, 27 Dec 2017 11:56:11 +0530 Subject: [PATCH 40/54] Bug #27041445 SERVER ABORTS IF FTS_DOC_ID EXCEEDS FTS_DOC_ID_MAX_STEP Problem: ======= Multiple insert statement in table contains FULLTEXT KEY and a FTS_DOC_ID column aborts the server if the FTS_DOC_ID exceeds FTS_DOC_ID_MAX_STEP. Solution: ======== Remove the exception for first committed insert statement. Reviewed-by: Jimmy Yang RB: 18023 --- .../{innodb-fts-basic.result => basic.result} | 34 ++++++++++++++++ .../t/{innodb-fts-basic.test => basic.test} | 39 ++++++++++++++++--- storage/xtradb/row/row0mysql.cc | 3 +- 3 files changed, 69 insertions(+), 7 deletions(-) rename mysql-test/suite/innodb_fts/r/{innodb-fts-basic.result => basic.result} (89%) rename mysql-test/suite/innodb_fts/t/{innodb-fts-basic.test => basic.test} (85%) diff --git a/mysql-test/suite/innodb_fts/r/innodb-fts-basic.result b/mysql-test/suite/innodb_fts/r/basic.result similarity index 89% rename from mysql-test/suite/innodb_fts/r/innodb-fts-basic.result rename to mysql-test/suite/innodb_fts/r/basic.result index fe767476fe6..ae23b93dc84 100644 --- a/mysql-test/suite/innodb_fts/r/innodb-fts-basic.result +++ b/mysql-test/suite/innodb_fts/r/basic.result @@ -257,3 +257,37 @@ WHERE MATCH (title,body) AGAINST ('"more test proximity"' IN BOOLEAN MODE); id title body drop table articles; +# +# Bug #22679185 INVALID INNODB FTS DOC ID DURING INSERT +# +create table t1 (f1 int not null primary key, f2 varchar(100), +FTS_DOC_ID bigint(20) unsigned not null, +unique key `FTS_DOC_ID_INDEX` (`FTS_DOC_ID`), +fulltext key (f2))engine=innodb; +insert into t1 values(1, "This is the first record", 20000); +insert into t1 values(2, "This is the second record", 40000); +select FTS_DOC_ID from t1; +FTS_DOC_ID +20000 +40000 +drop table t1; +create table t1 (f1 int not null primary key, f2 varchar(100), +FTS_DOC_ID bigint(20) unsigned not null auto_increment, +unique key `FTS_DOC_ID_INDEX` (`FTS_DOC_ID`), +fulltext key (f2))engine=innodb; +set auto_increment_increment = 65535; +insert into t1(f1, f2) values(1, "This is the first record"); +insert into t1(f1, f2) values(2, "This is the second record"); +insert into t1(f1, f2) values(3, "This is the third record"); +select FTS_DOC_ID from t1; +FTS_DOC_ID +1 +65536 +131071 +drop table t1; +call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535"); +CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, +title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB; +INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000); +ERROR HY000: Invalid InnoDB FTS Doc ID +DROP TABLE t1; diff --git a/mysql-test/suite/innodb_fts/t/innodb-fts-basic.test b/mysql-test/suite/innodb_fts/t/basic.test similarity index 85% rename from mysql-test/suite/innodb_fts/t/innodb-fts-basic.test rename to mysql-test/suite/innodb_fts/t/basic.test index 095713130f1..58f36be08a5 100644 --- a/mysql-test/suite/innodb_fts/t/innodb-fts-basic.test +++ b/mysql-test/suite/innodb_fts/t/basic.test @@ -2,11 +2,6 @@ -- source include/have_innodb.inc -if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`) -{ - --skip Not fixed in InnoDB 5.6.10 or earlier -} - # Create FTS table CREATE TABLE articles ( id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, @@ -226,3 +221,37 @@ SELECT * FROM articles AGAINST ('"more test proximity"' IN BOOLEAN MODE); drop table articles; + +--echo # +--echo # Bug #22679185 INVALID INNODB FTS DOC ID DURING INSERT +--echo # + +create table t1 (f1 int not null primary key, f2 varchar(100), + FTS_DOC_ID bigint(20) unsigned not null, + unique key `FTS_DOC_ID_INDEX` (`FTS_DOC_ID`), + fulltext key (f2))engine=innodb; + +insert into t1 values(1, "This is the first record", 20000); +insert into t1 values(2, "This is the second record", 40000); +select FTS_DOC_ID from t1; +drop table t1; + + +create table t1 (f1 int not null primary key, f2 varchar(100), + FTS_DOC_ID bigint(20) unsigned not null auto_increment, + unique key `FTS_DOC_ID_INDEX` (`FTS_DOC_ID`), + fulltext key (f2))engine=innodb; + +set auto_increment_increment = 65535; +insert into t1(f1, f2) values(1, "This is the first record"); +insert into t1(f1, f2) values(2, "This is the second record"); +insert into t1(f1, f2) values(3, "This is the third record"); +select FTS_DOC_ID from t1; +drop table t1; + +call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535"); +CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, + title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB; +--error 182 +INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000); +DROP TABLE t1; diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 7a4c5e114c1..838506c8786 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1442,8 +1442,7 @@ error_exit: doc_ids difference should not exceed FTS_DOC_ID_MAX_STEP value. */ - if (next_doc_id > 1 - && doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { + if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { fprintf(stderr, "InnoDB: Doc ID " UINT64PF " is too" " big. Its difference with largest" From 197bf0fe35efb148c4e751e1b695786d61238e8e Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Thu, 22 Feb 2018 18:45:38 +0530 Subject: [PATCH 41/54] Bug #26334149 - MYSQL CRASHES WHEN FULL TEXT INDEXES IBD FILES ARE ORPHANED DUE TO RENAME TABLE Problem: When FTS index is added into a table which doesn't have 'FTS_DOC_ID' column, Innodb rebuilds table to add column 'FTS_DOC_ID'. when this FTS index is dropped from this table. Innodb doesn't not rebuild table to remove 'FTS_DOC_ID' column and deletes FTS index auxiliary tables. But it doesn't delete FTS common auxiliary tables. Later when the database having this table is renamed, FTS auxiliary tables are not renamed because table's flags2 (dict_table_t.flags2) has been resetted for DICT_TF2_FTS flag during FTS index drop operation. Now when we drop old database, it leads to an assert. Fix: During renaming of FTS auxiliary tables, ORed a condition to check if table has DICT_TF2_FTS_HAS_DOC_ID flag set. RB: 18769 Reviewed by : Jimmy.Yang@oracle.com --- mysql-test/suite/innodb/r/innodb-alter.result | 29 +++++++++++++++++++ mysql-test/suite/innodb/t/innodb-alter.test | 21 ++++++++++++++ storage/xtradb/row/row0mysql.cc | 5 ++-- 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/innodb/r/innodb-alter.result b/mysql-test/suite/innodb/r/innodb-alter.result index 0bad6c5e0c1..bd82cc8a764 100644 --- a/mysql-test/suite/innodb/r/innodb-alter.result +++ b/mysql-test/suite/innodb/r/innodb-alter.result @@ -859,3 +859,32 @@ DROP TABLE dest_db.t1; DROP TABLE source_db.t1; DROP DATABASE source_db; DROP DATABASE dest_db; +# +# BUG #26334149 MYSQL CRASHES WHEN FULL TEXT INDEXES IBD FILES ARE +# ORPHANED DUE TO RENAME TABLE +# +CREATE DATABASE db1; +USE db1; +CREATE TABLE notes ( +id int(11) NOT NULL AUTO_INCREMENT, +body text COLLATE utf8_unicode_ci, +PRIMARY KEY (id) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 +COLLATE=utf8_unicode_ci +ROW_FORMAT=COMPRESSED; +Warnings: +Warning 1478 InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope. +Warning 1478 InnoDB: assuming ROW_FORMAT=COMPACT. +ALTER TABLE notes ADD FULLTEXT INDEX index_ft_body (body(255)); +Warnings: +Warning 1478 InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope. +Warning 1478 InnoDB: assuming ROW_FORMAT=COMPACT. +Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID +DROP INDEX index_ft_body ON notes; +Warnings: +Warning 1478 InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope. +Warning 1478 InnoDB: assuming ROW_FORMAT=COMPACT. +CREATE DATABASE db2; +RENAME TABLE db1.notes TO db2.notes; +DROP DATABASE db1; +DROP DATABASE db2; diff --git a/mysql-test/suite/innodb/t/innodb-alter.test b/mysql-test/suite/innodb/t/innodb-alter.test index af75482703c..0d3cc6f1733 100644 --- a/mysql-test/suite/innodb/t/innodb-alter.test +++ b/mysql-test/suite/innodb/t/innodb-alter.test @@ -481,3 +481,24 @@ eval ALTER TABLE $source_db.t1 DROP INDEX index2, algorithm=inplace; eval DROP TABLE $source_db.t1; eval DROP DATABASE $source_db; eval DROP DATABASE $dest_db; + +--echo # +--echo # BUG #26334149 MYSQL CRASHES WHEN FULL TEXT INDEXES IBD FILES ARE +--echo # ORPHANED DUE TO RENAME TABLE +--echo # +CREATE DATABASE db1; USE db1; +CREATE TABLE notes ( + id int(11) NOT NULL AUTO_INCREMENT, + body text COLLATE utf8_unicode_ci, + PRIMARY KEY (id) + ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 +COLLATE=utf8_unicode_ci +ROW_FORMAT=COMPRESSED; + +ALTER TABLE notes ADD FULLTEXT INDEX index_ft_body (body(255)); +DROP INDEX index_ft_body ON notes; + +CREATE DATABASE db2; +RENAME TABLE db1.notes TO db2.notes; +DROP DATABASE db1; +DROP DATABASE db2; diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 838506c8786..cb496965f01 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -5168,7 +5168,8 @@ row_rename_table_for_mysql( } } - if (dict_table_has_fts_index(table) + if ((dict_table_has_fts_index(table) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) && !dict_tables_have_same_db(old_name, new_name)) { err = fts_rename_aux_tables(table, new_name, trx); if (err != DB_TABLE_NOT_FOUND) { From 64f4576be440a766e746a42e0429f0fb6a6d4a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 10:08:00 +0300 Subject: [PATCH 42/54] MDEV-15697 post-fix: Remove an unused variable --- storage/spider/spd_param.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/storage/spider/spd_param.cc b/storage/spider/spd_param.cc index 4ef85ec485f..71b43cae4d4 100644 --- a/storage/spider/spd_param.cc +++ b/storage/spider/spd_param.cc @@ -928,7 +928,6 @@ bool spider_param_use_default_database( DBUG_RETURN(THDVAR(thd, use_default_database)); } -static int spider_internal_sql_log_off; /* -1 :don't know or does not matter; don't send 'SET SQL_LOG_OFF' statement 0 :do send 'SET SQL_LOG_OFF 0' statement to data nodes From c407ee0976bb10d64a902bb4df36b30b3532012a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 16:24:51 +0300 Subject: [PATCH 43/54] =?UTF-8?q?MDEV-16145=20Crash=20in=20ALTER=20TABLE?= =?UTF-8?q?=E2=80=A6AUTO=5FINCREMENT=3D1=20after=20DISCARD=20TABLESPACE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the MariaDB equivalent of fixing the MySQL 5.7 regression Bug #26935001 ALTER TABLE AUTO_INCREMENT TRIES TO READ INDEX FROM DISCARDED TABLESPACE Oracle did not publish a test case, but it is easy to guess based on the commit message. The MariaDB code is different due to MDEV-6076 implementing persistent AUTO_INCREMENT. commit_set_autoinc(): Report ER_TABLESPACE_DISCARDED if the tablespace is missing. prepare_inplace_alter_table_dict(): Avoid accessing a discarded tablespace. (This avoids generating warnings in fil_space_acquire().) --- .../innodb/r/alter_missing_tablespace.result | 23 ++++++++++++--- .../innodb/t/alter_missing_tablespace.test | 22 +++++++++++++-- storage/innobase/handler/handler0alter.cc | 28 +++++++++++++------ 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/mysql-test/suite/innodb/r/alter_missing_tablespace.result b/mysql-test/suite/innodb/r/alter_missing_tablespace.result index 1517afd1a39..237d0df26ff 100644 --- a/mysql-test/suite/innodb/r/alter_missing_tablespace.result +++ b/mysql-test/suite/innodb/r/alter_missing_tablespace.result @@ -3,8 +3,10 @@ # OR DISCARDED TABLESPACES # SET GLOBAL innodb_file_per_table=1; -CREATE TABLE t(a INT)ENGINE=InnoDB; +CREATE TABLE t(a SERIAL)ENGINE=InnoDB; CREATE TABLE `x..d` (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t1(a SERIAL)ENGINE=InnoDB; +INSERT INTO t1 VALUES(1),(2),(3); SELECT * FROM t; ERROR 42S02: Table 'test.t' doesn't exist in engine ALTER TABLE t ADD INDEX (a), ALGORITHM=INPLACE; @@ -13,11 +15,16 @@ SHOW WARNINGS; Level Code Message Warning 1812 Tablespace is missing for table 'test/t' Error 1932 Table 'test.t' doesn't exist in engine -ALTER TABLE t1 ADD INDEX (a), ALGORITHM=COPY; -ERROR 42S02: Table 'test.t1' doesn't exist +ALTER TABLE t ADD INDEX (a), ALGORITHM=COPY; +ERROR 42S02: Table 'test.t' doesn't exist in engine SHOW WARNINGS; Level Code Message -Error 1146 Table 'test.t1' doesn't exist +Warning 1812 Tablespace is missing for table 'test/t' +Error 1932 Table 'test.t' doesn't exist in engine +ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=INPLACE; +ERROR 42S02: Table 'test.t' doesn't exist in engine +ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=COPY; +ERROR 42S02: Table 'test.t' doesn't exist in engine ALTER TABLE t ALGORITHM=INPLACE, DISCARD TABLESPACE; ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'DISCARD TABLESPACE' at line 1 ALTER TABLE t ALGORITHM=COPY, DISCARD TABLESPACE; @@ -32,3 +39,11 @@ DROP TABLE t; SELECT * FROM `x..d`; ERROR 42S02: Table 'test.x..d' doesn't exist in engine DROP TABLE `x..d`; +ALTER TABLE t1 DISCARD TABLESPACE; +ALTER TABLE t1 AUTO_INCREMENT=1, ALGORITHM=INPLACE; +ERROR HY000: Tablespace has been discarded for table `t1` +ALTER TABLE t1 AUTO_INCREMENT=1, FORCE, ALGORITHM=INPLACE; +ERROR HY000: Tablespace has been discarded for table `t1` +ALTER TABLE t1 AUTO_INCREMENT=1, ALGORITHM=COPY; +ERROR HY000: Tablespace has been discarded for table `t1` +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/alter_missing_tablespace.test b/mysql-test/suite/innodb/t/alter_missing_tablespace.test index d877b8f3b5d..4ab291a69f8 100644 --- a/mysql-test/suite/innodb/t/alter_missing_tablespace.test +++ b/mysql-test/suite/innodb/t/alter_missing_tablespace.test @@ -21,8 +21,10 @@ call mtr.add_suppression("Table .* in the InnoDB data dictionary has tablespace let $MYSQLD_DATADIR=`select @@datadir`; SET GLOBAL innodb_file_per_table=1; -CREATE TABLE t(a INT)ENGINE=InnoDB; +CREATE TABLE t(a SERIAL)ENGINE=InnoDB; CREATE TABLE `x..d` (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t1(a SERIAL)ENGINE=InnoDB; +INSERT INTO t1 VALUES(1),(2),(3); --source include/shutdown_mysqld.inc @@ -41,10 +43,15 @@ SELECT * FROM t; ALTER TABLE t ADD INDEX (a), ALGORITHM=INPLACE; SHOW WARNINGS; ---error ER_NO_SUCH_TABLE -ALTER TABLE t1 ADD INDEX (a), ALGORITHM=COPY; +--error ER_NO_SUCH_TABLE_IN_ENGINE +ALTER TABLE t ADD INDEX (a), ALGORITHM=COPY; SHOW WARNINGS; +--error ER_NO_SUCH_TABLE_IN_ENGINE +ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=INPLACE; +--error ER_NO_SUCH_TABLE_IN_ENGINE +ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=COPY; + --error ER_PARSE_ERROR ALTER TABLE t ALGORITHM=INPLACE, DISCARD TABLESPACE; --error ER_PARSE_ERROR @@ -56,3 +63,12 @@ DROP TABLE t; --error ER_NO_SUCH_TABLE_IN_ENGINE SELECT * FROM `x..d`; DROP TABLE `x..d`; + +ALTER TABLE t1 DISCARD TABLESPACE; +--error ER_TABLESPACE_DISCARDED +ALTER TABLE t1 AUTO_INCREMENT=1, ALGORITHM=INPLACE; +--error ER_TABLESPACE_DISCARDED +ALTER TABLE t1 AUTO_INCREMENT=1, FORCE, ALGORITHM=INPLACE; +--error ER_TABLESPACE_DISCARDED +ALTER TABLE t1 AUTO_INCREMENT=1, ALGORITHM=COPY; +DROP TABLE t1; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 1004a574cd1..c483e2dea59 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -4518,8 +4518,9 @@ prepare_inplace_alter_table_dict( uint32_t key_id = FIL_DEFAULT_ENCRYPTION_KEY; fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT; - if (fil_space_t* space - = fil_space_acquire(ctx->prebuilt->table->space)) { + if (dict_table_is_discarded(ctx->prebuilt->table)) { + } else if (fil_space_t* space + = fil_space_acquire(ctx->prebuilt->table->space)) { if (const fil_space_crypt_t* crypt_data = space->crypt_data) { key_id = crypt_data->key_id; @@ -4917,7 +4918,8 @@ new_clustered_failed: /* Initialize the AUTO_INCREMENT sequence to the rebuilt table from the old one. */ - if (!old_table->found_next_number_field) { + if (!old_table->found_next_number_field + || dict_table_is_discarded(user_table)) { } else if (ib_uint64_t autoinc = btr_read_autoinc(clust_index)) { btr_write_autoinc(new_clust_index, autoinc); @@ -7372,9 +7374,10 @@ innobase_rename_or_enlarge_columns_cache( @param ha_alter_info Data used during in-place alter @param ctx In-place ALTER TABLE context @param altered_table MySQL table that is being altered -@param old_table MySQL table as it is before the ALTER operation */ +@param old_table MySQL table as it is before the ALTER operation +@return whether the operation failed (and my_error() was called) */ static MY_ATTRIBUTE((nonnull)) -void +bool commit_set_autoinc( Alter_inplace_info* ha_alter_info, ha_innobase_inplace_ctx*ctx, @@ -7402,6 +7405,13 @@ commit_set_autoinc( & Alter_inplace_info::CHANGE_CREATE_OPTION) && (ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { + + if (dict_table_is_discarded(ctx->old_table)) { + my_error(ER_TABLESPACE_DISCARDED, MYF(0), + old_table->s->table_name.str); + DBUG_RETURN(true); + } + /* An AUTO_INCREMENT value was supplied by the user. It must be persisted to the data file. */ const Field* ai = old_table->found_next_number_field; @@ -7481,7 +7491,7 @@ commit_set_autoinc( between prepare_inplace and commit_inplace. */ } - DBUG_VOID_RETURN; + DBUG_RETURN(false); } /** Add or drop foreign key constraints to the data dictionary tables, @@ -8604,9 +8614,11 @@ ha_innobase::commit_inplace_alter_table( DBUG_ASSERT(new_clustered == ctx->need_rebuild()); - commit_set_autoinc(ha_alter_info, ctx, altered_table, table); + fail = commit_set_autoinc(ha_alter_info, ctx, altered_table, + table); - if (ctx->need_rebuild()) { + if (fail) { + } else if (ctx->need_rebuild()) { ctx->tmp_name = dict_mem_create_temporary_tablename( ctx->heap, ctx->new_table->name.m_name, ctx->new_table->id); From e26b07dbc6d85733eaf655895648ec08163debb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 18:17:26 +0300 Subject: [PATCH 44/54] Add a test case for a MySQL 5.7 bug that did not affect MariaDB --- mysql-test/suite/innodb/r/foreign_key.result | 11 +++++++++++ mysql-test/suite/innodb/t/foreign_key.test | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/mysql-test/suite/innodb/r/foreign_key.result b/mysql-test/suite/innodb/r/foreign_key.result index e569fc7dba7..5838c3a1fd5 100644 --- a/mysql-test/suite/innodb/r/foreign_key.result +++ b/mysql-test/suite/innodb/r/foreign_key.result @@ -306,3 +306,14 @@ id member_id SELECT * FROM payment_method; id member_id cardholder_address_id DROP TABLE payment_method,address,member; +# +# Bug #26958695 INNODB NESTED STORED FIELD WITH CONSTRAINT KEY +# PRODUCE BROKEN TABLE (no bug in MariaDB) +# +create table t1(f1 int,f2 int, primary key(f1), key(f2, f1))engine=innodb; +create table t2(f1 int, f2 int as (2) stored, f3 int as (f2) stored, +foreign key(f1) references t1(f2) on update set NULL) +engine=innodb; +insert into t1 values(1, 1); +insert into t2(f1) values(1); +drop table t2, t1; diff --git a/mysql-test/suite/innodb/t/foreign_key.test b/mysql-test/suite/innodb/t/foreign_key.test index 862717647b5..b586f3e9406 100644 --- a/mysql-test/suite/innodb/t/foreign_key.test +++ b/mysql-test/suite/innodb/t/foreign_key.test @@ -276,4 +276,16 @@ SELECT * FROM payment_method; DROP TABLE payment_method,address,member; +--echo # +--echo # Bug #26958695 INNODB NESTED STORED FIELD WITH CONSTRAINT KEY +--echo # PRODUCE BROKEN TABLE (no bug in MariaDB) +--echo # +create table t1(f1 int,f2 int, primary key(f1), key(f2, f1))engine=innodb; +create table t2(f1 int, f2 int as (2) stored, f3 int as (f2) stored, + foreign key(f1) references t1(f2) on update set NULL) +engine=innodb; +insert into t1 values(1, 1); +insert into t2(f1) values(1); +drop table t2, t1; + --source include/wait_until_count_sessions.inc From 3d10966b7d5ed408f9c085f5ba9ed9b12a7317aa Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Tue, 5 Dec 2017 19:13:12 +0530 Subject: [PATCH 45/54] Bug #26731689 FK ON TABLE WITH GENERATED COLS: ASSERTION POS < N_DEF Problem: During ALTER, when filling stored column info, wrong column number is used. This is because we ignored virtual column when iterating over columns in table and lead to debug assertion. Fix: In InnoDB table cache object, vcols are on stored on one list, stored and normal columns are stored in another list. When looking for stored column, ignore the virtual columns to get the right column number of stored column. Reviewed by: Thiru , Satya RB: 17939 --- mysql-test/suite/innodb/r/stored_fk.result | 66 +++++++++++++ mysql-test/suite/innodb/t/stored_fk.test | 105 +++++++++++++++++++++ storage/innobase/handler/handler0alter.cc | 10 +- 3 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 mysql-test/suite/innodb/r/stored_fk.result create mode 100644 mysql-test/suite/innodb/t/stored_fk.test diff --git a/mysql-test/suite/innodb/r/stored_fk.result b/mysql-test/suite/innodb/r/stored_fk.result new file mode 100644 index 00000000000..6fb1684497f --- /dev/null +++ b/mysql-test/suite/innodb/r/stored_fk.result @@ -0,0 +1,66 @@ +# Create statement with FK on base column of stored column +create table t1(f1 int, f2 int as(f1) stored, +foreign key(f1) references t2(f1) on delete cascade)engine=innodb; +ERROR HY000: Cannot add foreign key constraint +# adding new stored column during alter table copy operation. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) virtual, +foreign key(f1) references t1(f1) on update cascade)engine=innodb; +alter table t2 add column f3 int as (f1) stored, add column f4 int as (f1) virtual; +ERROR HY000: Error on rename of 'OLD_FILE_NAME' to 'NEW_FILE_NAME' (errno: 150 - Foreign key constraint is incorrectly formed) +drop table t2, t1; +# adding foreign key constraint for base columns during alter copy. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) stored); +alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=copy; +ERROR HY000: Cannot add foreign key constraint +drop table t2, t1; +# adding foreign key constraint for base columns during online alter. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) stored); +set foreign_key_checks = 0; +alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=inplace; +ERROR HY000: Cannot add foreign key on the base column of stored column. +drop table t2, t1; +# adding stored column via online alter. +create table t1(f1 int primary key); +create table t2(f1 int not null, +foreign key(f1) references t1(f1) on update cascade)engine=innodb; +alter table t2 add column f2 int as (f1) stored, algorithm=inplace; +ERROR 0A000: ALGORITHM=INPLACE is not supported for this operation. Try ALGORITHM=COPY. +drop table t2, t1; +set foreign_key_checks = 1; +# +# BUG#26731689 FK ON TABLE WITH GENERATED COLS: ASSERTION POS < N_DEF +# +SET @foreign_key_checks_saved = @@foreign_key_checks; +SET foreign_key_checks=0; +DROP TABLE IF EXISTS s,t; +CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED, c INT, +d INT GENERATED ALWAYS AS (0) VIRTUAL, e INT) ENGINE=innodb; +CREATE TABLE t (a INT) ENGINE=innodb; +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (e) REFERENCES t(a) ON UPDATE SET null; +ERROR HY000: Failed to add the foreign key constaint. Missing index for constraint 'c' in the referenced table 't' +ALTER TABLE t ADD PRIMARY KEY(a); +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (e) REFERENCES t(a) ON UPDATE SET null; +DROP TABLE s,t; +CREATE TABLE s (a INT GENERATED ALWAYS AS (0) VIRTUAL, +b INT GENERATED ALWAYS AS (0) STORED, c INT) ENGINE=innodb; +CREATE TABLE t (a INT) ENGINE=innodb; +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (c) REFERENCES t(a) ON UPDATE SET null; +ERROR HY000: Failed to add the foreign key constaint. Missing index for constraint 'c' in the referenced table 't' +ALTER TABLE t ADD PRIMARY KEY(a); +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (c) REFERENCES t(a) ON UPDATE SET null; +DROP TABLE s,t; +CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED) ENGINE=innodb; +CREATE TABLE t (a INT PRIMARY KEY) ENGINE=innodb; +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; +DROP TABLE s,t; +CREATE TABLE s (a INT, b INT) ENGINE=innodb; +CREATE TABLE t (a INT) ENGINE=innodb; +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; +ERROR HY000: Failed to add the foreign key constaint. Missing index for constraint 'c' in the referenced table 't' +ALTER TABLE t ADD PRIMARY KEY(a); +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; +DROP TABLE s,t; +SET @@foreign_key_checks = @foreign_key_checks_saved; diff --git a/mysql-test/suite/innodb/t/stored_fk.test b/mysql-test/suite/innodb/t/stored_fk.test new file mode 100644 index 00000000000..da8df775523 --- /dev/null +++ b/mysql-test/suite/innodb/t/stored_fk.test @@ -0,0 +1,105 @@ +--source include/have_innodb.inc + +--echo # Create statement with FK on base column of stored column +--error ER_CANNOT_ADD_FOREIGN +create table t1(f1 int, f2 int as(f1) stored, + foreign key(f1) references t2(f1) on delete cascade)engine=innodb; + +--echo # adding new stored column during alter table copy operation. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) virtual, + foreign key(f1) references t1(f1) on update cascade)engine=innodb; + +--replace_regex /Error on rename of '.*' to '.*'/Error on rename of 'OLD_FILE_NAME' to 'NEW_FILE_NAME'/ +--error ER_ERROR_ON_RENAME +alter table t2 add column f3 int as (f1) stored, add column f4 int as (f1) virtual; +drop table t2, t1; + +--echo # adding foreign key constraint for base columns during alter copy. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) stored); +--error ER_CANNOT_ADD_FOREIGN +alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=copy; +drop table t2, t1; + +--echo # adding foreign key constraint for base columns during online alter. +create table t1(f1 int primary key); +create table t2(f1 int not null, f2 int as (f1) stored); +set foreign_key_checks = 0; +--error ER_CANNOT_ADD_FOREIGN_BASE_COL_STORED +alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=inplace; +drop table t2, t1; + +--echo # adding stored column via online alter. +create table t1(f1 int primary key); +create table t2(f1 int not null, + foreign key(f1) references t1(f1) on update cascade)engine=innodb; +--error ER_ALTER_OPERATION_NOT_SUPPORTED +alter table t2 add column f2 int as (f1) stored, algorithm=inplace; +drop table t2, t1; +set foreign_key_checks = 1; + +--echo # +--echo # BUG#26731689 FK ON TABLE WITH GENERATED COLS: ASSERTION POS < N_DEF +--echo # + +SET @foreign_key_checks_saved = @@foreign_key_checks; +SET foreign_key_checks=0; + +--disable_warnings +DROP TABLE IF EXISTS s,t; +--enable_warnings + +CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED, c INT, + d INT GENERATED ALWAYS AS (0) VIRTUAL, e INT) ENGINE=innodb; + +CREATE TABLE t (a INT) ENGINE=innodb; + +# This would fail. No corresponding index +--error ER_FK_NO_INDEX_PARENT +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (e) REFERENCES t(a) ON UPDATE SET null; + +ALTER TABLE t ADD PRIMARY KEY(a); + +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (e) REFERENCES t(a) ON UPDATE SET null; + +DROP TABLE s,t; + +CREATE TABLE s (a INT GENERATED ALWAYS AS (0) VIRTUAL, + b INT GENERATED ALWAYS AS (0) STORED, c INT) ENGINE=innodb; + +CREATE TABLE t (a INT) ENGINE=innodb; + +# This would fail. No corresponding index +--error ER_FK_NO_INDEX_PARENT +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (c) REFERENCES t(a) ON UPDATE SET null; + +ALTER TABLE t ADD PRIMARY KEY(a); + +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (c) REFERENCES t(a) ON UPDATE SET null; + +DROP TABLE s,t; + +CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED) ENGINE=innodb; + +CREATE TABLE t (a INT PRIMARY KEY) ENGINE=innodb; + +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; + +DROP TABLE s,t; + +CREATE TABLE s (a INT, b INT) ENGINE=innodb; + +CREATE TABLE t (a INT) ENGINE=innodb; + +# This would fail. No corresponding index +--error ER_FK_NO_INDEX_PARENT +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; + +ALTER TABLE t ADD PRIMARY KEY(a); + +ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; + +DROP TABLE s,t; + +SET @@foreign_key_checks = @foreign_key_checks_saved; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index c483e2dea59..2ae4f65efc5 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -5434,18 +5434,24 @@ alter_fill_stored_column( dict_s_col_list** s_cols, mem_heap_t** s_heap) { - ulint n_cols = altered_table->s->fields; + ulint n_cols = altered_table->s->fields; + ulint stored_col_no = 0; for (ulint i = 0; i < n_cols; i++) { Field* field = altered_table->field[i]; dict_s_col_t s_col; + if (!innobase_is_v_fld(field)) { + stored_col_no++; + } + if (!innobase_is_s_fld(field)) { continue; } ulint num_base = 0; - dict_col_t* col = dict_table_get_nth_col(table, i); + dict_col_t* col = dict_table_get_nth_col(table, + stored_col_no); s_col.m_col = col; s_col.s_pos = i; From 671a37f60e6663a175368b134574c0274964589b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 15:08:08 +0300 Subject: [PATCH 46/54] Adjust the test case for MariaDB Note: it does not appear to cover the bug fix! The test will pass even if the Oracle Bug #26731689 fix is reverted. --- mysql-test/suite/innodb/r/stored_fk.result | 46 +++++++++++++--------- mysql-test/suite/innodb/t/stored_fk.test | 37 ++++++----------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/mysql-test/suite/innodb/r/stored_fk.result b/mysql-test/suite/innodb/r/stored_fk.result index 6fb1684497f..35524d5a88f 100644 --- a/mysql-test/suite/innodb/r/stored_fk.result +++ b/mysql-test/suite/innodb/r/stored_fk.result @@ -1,41 +1,50 @@ # Create statement with FK on base column of stored column create table t1(f1 int, f2 int as(f1) stored, foreign key(f1) references t2(f1) on delete cascade)engine=innodb; -ERROR HY000: Cannot add foreign key constraint +ERROR HY000: Can't create table `test`.`t1` (errno: 150 "Foreign key constraint is incorrectly formed") # adding new stored column during alter table copy operation. -create table t1(f1 int primary key); +create table t1(f1 int primary key) engine=innodb; create table t2(f1 int not null, f2 int as (f1) virtual, foreign key(f1) references t1(f1) on update cascade)engine=innodb; alter table t2 add column f3 int as (f1) stored, add column f4 int as (f1) virtual; -ERROR HY000: Error on rename of 'OLD_FILE_NAME' to 'NEW_FILE_NAME' (errno: 150 - Foreign key constraint is incorrectly formed) -drop table t2, t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `f1` int(11) NOT NULL, + `f2` int(11) GENERATED ALWAYS AS (`f1`) VIRTUAL, + `f3` int(11) GENERATED ALWAYS AS (`f1`) STORED, + `f4` int(11) GENERATED ALWAYS AS (`f1`) VIRTUAL, + KEY `f1` (`f1`), + CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`f1`) REFERENCES `t1` (`f1`) ON UPDATE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t2; # adding foreign key constraint for base columns during alter copy. -create table t1(f1 int primary key); -create table t2(f1 int not null, f2 int as (f1) stored); +create table t2(f1 int not null, f2 int as (f1) stored) engine=innodb; alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=copy; -ERROR HY000: Cannot add foreign key constraint -drop table t2, t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `f1` int(11) NOT NULL, + `f2` int(11) GENERATED ALWAYS AS (`f1`) STORED, + KEY `f1` (`f1`), + CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`f1`) REFERENCES `t1` (`f1`) ON UPDATE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +drop table t2; # adding foreign key constraint for base columns during online alter. -create table t1(f1 int primary key); -create table t2(f1 int not null, f2 int as (f1) stored); +create table t2(f1 int not null, f2 int as (f1) stored) engine=innodb; set foreign_key_checks = 0; alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=inplace; -ERROR HY000: Cannot add foreign key on the base column of stored column. -drop table t2, t1; +ERROR 0A000: Cannot add foreign key on the base column of stored column +drop table t2; # adding stored column via online alter. -create table t1(f1 int primary key); create table t2(f1 int not null, foreign key(f1) references t1(f1) on update cascade)engine=innodb; alter table t2 add column f2 int as (f1) stored, algorithm=inplace; -ERROR 0A000: ALGORITHM=INPLACE is not supported for this operation. Try ALGORITHM=COPY. +ERROR 0A000: ALGORITHM=INPLACE is not supported for this operation. Try ALGORITHM=COPY drop table t2, t1; -set foreign_key_checks = 1; # # BUG#26731689 FK ON TABLE WITH GENERATED COLS: ASSERTION POS < N_DEF # -SET @foreign_key_checks_saved = @@foreign_key_checks; -SET foreign_key_checks=0; -DROP TABLE IF EXISTS s,t; CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED, c INT, d INT GENERATED ALWAYS AS (0) VIRTUAL, e INT) ENGINE=innodb; CREATE TABLE t (a INT) ENGINE=innodb; @@ -63,4 +72,3 @@ ERROR HY000: Failed to add the foreign key constaint. Missing index for constrai ALTER TABLE t ADD PRIMARY KEY(a); ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; DROP TABLE s,t; -SET @@foreign_key_checks = @foreign_key_checks_saved; diff --git a/mysql-test/suite/innodb/t/stored_fk.test b/mysql-test/suite/innodb/t/stored_fk.test index da8df775523..b9c7c934555 100644 --- a/mysql-test/suite/innodb/t/stored_fk.test +++ b/mysql-test/suite/innodb/t/stored_fk.test @@ -1,55 +1,46 @@ --source include/have_innodb.inc --echo # Create statement with FK on base column of stored column ---error ER_CANNOT_ADD_FOREIGN +--error ER_CANT_CREATE_TABLE create table t1(f1 int, f2 int as(f1) stored, foreign key(f1) references t2(f1) on delete cascade)engine=innodb; --echo # adding new stored column during alter table copy operation. -create table t1(f1 int primary key); +create table t1(f1 int primary key) engine=innodb; create table t2(f1 int not null, f2 int as (f1) virtual, foreign key(f1) references t1(f1) on update cascade)engine=innodb; ---replace_regex /Error on rename of '.*' to '.*'/Error on rename of 'OLD_FILE_NAME' to 'NEW_FILE_NAME'/ ---error ER_ERROR_ON_RENAME +# MySQL 5.7 would refuse this +#--error ER_ERROR_ON_RENAME alter table t2 add column f3 int as (f1) stored, add column f4 int as (f1) virtual; -drop table t2, t1; +show create table t2; +drop table t2; --echo # adding foreign key constraint for base columns during alter copy. -create table t1(f1 int primary key); -create table t2(f1 int not null, f2 int as (f1) stored); ---error ER_CANNOT_ADD_FOREIGN +create table t2(f1 int not null, f2 int as (f1) stored) engine=innodb; +# MySQL 5.7 would refuse this alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=copy; -drop table t2, t1; +show create table t2; +drop table t2; --echo # adding foreign key constraint for base columns during online alter. -create table t1(f1 int primary key); -create table t2(f1 int not null, f2 int as (f1) stored); +create table t2(f1 int not null, f2 int as (f1) stored) engine=innodb; set foreign_key_checks = 0; ---error ER_CANNOT_ADD_FOREIGN_BASE_COL_STORED +--error 138 alter table t2 add foreign key(f1) references t1(f1) on update cascade, algorithm=inplace; -drop table t2, t1; +drop table t2; --echo # adding stored column via online alter. -create table t1(f1 int primary key); create table t2(f1 int not null, foreign key(f1) references t1(f1) on update cascade)engine=innodb; --error ER_ALTER_OPERATION_NOT_SUPPORTED alter table t2 add column f2 int as (f1) stored, algorithm=inplace; drop table t2, t1; -set foreign_key_checks = 1; --echo # --echo # BUG#26731689 FK ON TABLE WITH GENERATED COLS: ASSERTION POS < N_DEF --echo # -SET @foreign_key_checks_saved = @@foreign_key_checks; -SET foreign_key_checks=0; - ---disable_warnings -DROP TABLE IF EXISTS s,t; ---enable_warnings - CREATE TABLE s (a INT, b INT GENERATED ALWAYS AS (0) STORED, c INT, d INT GENERATED ALWAYS AS (0) VIRTUAL, e INT) ENGINE=innodb; @@ -101,5 +92,3 @@ ALTER TABLE t ADD PRIMARY KEY(a); ALTER TABLE s ADD CONSTRAINT c FOREIGN KEY (a) REFERENCES t(a) ON UPDATE SET null; DROP TABLE s,t; - -SET @@foreign_key_checks = @foreign_key_checks_saved; From b7e333f98a711931cbcbb444302fcbddcefb6f10 Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Mon, 18 Dec 2017 18:40:08 +0530 Subject: [PATCH 47/54] Bug #26805833 INNODB COMPLAINS OF SYNTAX ERROR, BUT DOES NOT SAY WHICH OPTION Problem: when incorrect value is assigned to innodb_data_file_path or innodb_temp_data_file_path parameter, Innodb returns error and logs error message in mysqlds.err file but there is no information in error message about the parameter which causes Innodb initialization is failed. Fix: Added error message with parameter name and value, which causes Innodb initialization is failed. Reviewed by: Jimmy RB: 18206 --- mysql-test/suite/innodb/t/temporary_table.test | 1 + storage/innobase/handler/ha_innodb.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/mysql-test/suite/innodb/t/temporary_table.test b/mysql-test/suite/innodb/t/temporary_table.test index 9a640657c4d..db87367dbb9 100644 --- a/mysql-test/suite/innodb/t/temporary_table.test +++ b/mysql-test/suite/innodb/t/temporary_table.test @@ -21,6 +21,7 @@ call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("innodb_temporary and innodb_system file names seem to be the same"); call mtr.add_suppression("Could not create the shared innodb_temporary"); call mtr.add_suppression("InnoDB: syntax error in file path"); +call mtr.add_suppression("InnoDB: Unable to parse innodb_temp_data_file_path="); --enable_query_log let $MYSQL_TMP_DIR = `select @@tmpdir`; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 51a8e3abaf0..b74c886c692 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3932,6 +3932,8 @@ innobase_init( /* Supports raw devices */ if (!srv_sys_space.parse_params(innobase_data_file_path, true)) { + ib::error() << "Unable to parse innodb_data_file_path=" + << innobase_data_file_path; DBUG_RETURN(innobase_init_abort()); } @@ -3950,6 +3952,8 @@ innobase_init( srv_tmp_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) { + ib::error() << "Unable to parse innodb_temp_data_file_path=" + << innobase_temp_data_file_path; DBUG_RETURN(innobase_init_abort()); } From 280879ebd97b286a1ac72e79148badddce9e6e29 Mon Sep 17 00:00:00 2001 From: Aditya A Date: Mon, 29 Jan 2018 17:17:21 +0530 Subject: [PATCH 48/54] Bug #27304661 MYSQL CRASH DOING SYNC INDEX ] [FATAL] INNODB: SEMAPHORE WAIT HAS LASTED > 600 PROBLEM ------- Whenever an fts table is created it registers itself in a queue which is operated by a background thread whose job is to optimize the fts tables in background. Additionally we place these fts tables in non-LRU list so that they cannot be evicted from cache. But in the scenario when a node is brought up which is already having fts tables ,we first try to load the fts tables in dictionary ,but we skip the part where it is added in background queue and in non-LRU list because the background thread is not yet created,so these tables are loaded but they can be evicted from the cache. Now coming to the deadlock scenario 1. A Server background thread is trying to evict a table from the cache because the cache is full,so it scans the LRU list for the tables it can evict.It finds the fts table (because of the reason explained above) can be evicted and it takes the dict_sys->mutex (this is a system wide mutex) submits a request to the background thread to remove this table from queue and waits it to be completed. 2. In the mean time fts_optimize_thread() is processing another job in the queue and needs dict_sys->mutex for a small amount of time, but it cannot get it because it is blocked by the first background thread. So Thread 1 is waiting for its job to be completed by Thread 2,whereas Thread 2 is waiting for dict_sys->mutex held by thread 1 ,causing the deadlock. FIX --- .../innodb_fts/r/fulltext_table_evict.result | 19 +++++ .../innodb_fts/t/fulltext_table_evict.test | 47 +++++++++++ storage/innobase/dict/dict0dict.cc | 9 ++- storage/innobase/fts/fts0opt.cc | 78 ++++++++++++++----- 4 files changed, 132 insertions(+), 21 deletions(-) create mode 100644 mysql-test/suite/innodb_fts/r/fulltext_table_evict.result create mode 100644 mysql-test/suite/innodb_fts/t/fulltext_table_evict.test diff --git a/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result b/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result new file mode 100644 index 00000000000..410bf2a34e1 --- /dev/null +++ b/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result @@ -0,0 +1,19 @@ +# +# Bug Bug #27304661 MYSQL CRASH DOING SYNC INDEX ] +# [FATAL] INNODB: SEMAPHORE WAIT HAS LASTED > 600 +# +CREATE TABLE t1 ( +id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, +f1 TEXT(500), +FULLTEXT idx (f1) +) ENGINE=InnoDB; +insert into t1 (f1) values ('fjdhfsjhf'),('dhjfhjshfj'),('dhjafjhfj'); +# restart +set @save_table_definition_cache=@@global.table_definition_cache; +set @save_table_open_cache=@@global.table_open_cache; +set global table_definition_cache=400; +set global table_open_cache= 1024; +SET GLOBAL DEBUG="+d,crash_if_fts_table_is_evicted"; +set @@global.table_definition_cache=@save_table_definition_cache; +set @@global.table_open_cache=@save_table_open_cache; +drop table t1; diff --git a/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test b/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test new file mode 100644 index 00000000000..245f00bc336 --- /dev/null +++ b/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test @@ -0,0 +1,47 @@ +--echo # +--echo # Bug Bug #27304661 MYSQL CRASH DOING SYNC INDEX ] +--echo # [FATAL] INNODB: SEMAPHORE WAIT HAS LASTED > 600 +--echo # + +--source include/have_innodb.inc +--source include/have_debug.inc + +CREATE TABLE t1 ( + id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, + f1 TEXT(500), + FULLTEXT idx (f1) + ) ENGINE=InnoDB; +insert into t1 (f1) values ('fjdhfsjhf'),('dhjfhjshfj'),('dhjafjhfj'); + +--source include/restart_mysqld.inc + +set @save_table_definition_cache=@@global.table_definition_cache; +set @save_table_open_cache=@@global.table_open_cache; + +set global table_definition_cache=400; +set global table_open_cache= 1024; + +SET GLOBAL DEBUG="+d,crash_if_fts_table_is_evicted"; +#Create 1000 tables, try the best to evict t1 . + +--disable_query_log +let $loop=1000; +while($loop) +{ + eval create table t_$loop(id int, name text(100), fulltext idxt_$loop(name) )engine=innodb; + dec $loop; +} + +let $loop=1000; +while($loop) +{ + eval drop table t_$loop; + dec $loop; +} + +SET GLOBAL DEBUG="-d,crash_if_fts_table_is_evicted"; +--enable_query_log +set @@global.table_definition_cache=@save_table_definition_cache; +set @@global.table_open_cache=@save_table_open_cache; +drop table t1; + diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index ac5357ded3a..ff8b3d75010 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2018, MariaDB Corporation. @@ -1453,6 +1453,13 @@ dict_make_room_in_cache( if (dict_table_can_be_evicted(table)) { + DBUG_EXECUTE_IF("crash_if_fts_table_is_evicted", + { + if (table->fts && + dict_table_has_fts_index(table)) { + ut_ad(0); + } + };); dict_table_remove_from_cache_low(table, TRUE); ++n_evicted; diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index 910721d8b32..cb1df7082f7 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -41,6 +41,9 @@ Completed 2011/7/10 Sunny and Jimmy Yang /** The FTS optimize thread's work queue. */ static ib_wqueue_t* fts_optimize_wq; +/** The FTS vector to store fts_slot_t */ +static ib_vector_t* fts_slots; + /** Time to wait for a message. */ static const ulint FTS_QUEUE_WAIT_IN_USECS = 5000000; @@ -2976,9 +2979,6 @@ fts_optimize_thread( /*================*/ void* arg) /*!< in: work queue*/ { - mem_heap_t* heap; - ib_vector_t* tables; - ib_alloc_t* heap_alloc; ulint current = 0; ibool done = FALSE; ulint n_tables = 0; @@ -2988,10 +2988,10 @@ fts_optimize_thread( ut_ad(!srv_read_only_mode); my_thread_init(); - heap = mem_heap_create(sizeof(dict_table_t*) * 64); - heap_alloc = ib_heap_allocator_create(heap); + ut_ad(fts_slots); - tables = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4); + /* Assign number of tables added in fts_slots_t to n_tables */ + n_tables = ib_vector_size(fts_slots); while (!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) { @@ -3005,10 +3005,10 @@ fts_optimize_thread( fts_slot_t* slot; - ut_a(ib_vector_size(tables) > 0); + ut_a(ib_vector_size(fts_slots) > 0); slot = static_cast( - ib_vector_get(tables, current)); + ib_vector_get(fts_slots, current)); /* Handle the case of empty slots. */ if (slot->state != FTS_STATE_EMPTY) { @@ -3021,8 +3021,8 @@ fts_optimize_thread( ++current; /* Wrap around the counter. */ - if (current >= ib_vector_size(tables)) { - n_optimize = fts_optimize_how_many(tables); + if (current >= ib_vector_size(fts_slots)) { + n_optimize = fts_optimize_how_many(fts_slots); current = 0; } @@ -3036,7 +3036,7 @@ fts_optimize_thread( /* Timeout ? */ if (msg == NULL) { - if (fts_is_sync_needed(tables)) { + if (fts_is_sync_needed(fts_slots)) { fts_need_sync = true; } @@ -3057,7 +3057,7 @@ fts_optimize_thread( case FTS_MSG_ADD_TABLE: ut_a(!done); if (fts_optimize_new_table( - tables, + fts_slots, static_cast( msg->ptr))) { ++n_tables; @@ -3067,7 +3067,7 @@ fts_optimize_thread( case FTS_MSG_OPTIMIZE_TABLE: if (!done) { fts_optimize_start_table( - tables, + fts_slots, static_cast( msg->ptr)); } @@ -3075,7 +3075,7 @@ fts_optimize_thread( case FTS_MSG_DEL_TABLE: if (fts_optimize_del_table( - tables, static_cast( + fts_slots, static_cast( msg->ptr))) { --n_tables; } @@ -3098,7 +3098,7 @@ fts_optimize_thread( mem_heap_free(msg->heap); if (!done) { - n_optimize = fts_optimize_how_many(tables); + n_optimize = fts_optimize_how_many(fts_slots); } else { n_optimize = 0; } @@ -3110,11 +3110,11 @@ fts_optimize_thread( if (n_tables > 0) { ulint i; - for (i = 0; i < ib_vector_size(tables); i++) { + for (i = 0; i < ib_vector_size(fts_slots); i++) { fts_slot_t* slot; slot = static_cast( - ib_vector_get(tables, i)); + ib_vector_get(fts_slots, i)); if (slot->state != FTS_STATE_EMPTY) { fts_optimize_sync_table(slot->table_id); @@ -3122,7 +3122,7 @@ fts_optimize_thread( } } - ib_vector_free(tables); + ib_vector_free(fts_slots); ib::info() << "FTS optimize thread exiting."; @@ -3142,14 +3142,52 @@ void fts_optimize_init(void) /*===================*/ { + mem_heap_t* heap; + ib_alloc_t* heap_alloc; + dict_table_t* table; + ut_ad(!srv_read_only_mode); /* For now we only support one optimize thread. */ ut_a(fts_optimize_wq == NULL); + /* Create FTS optimize work queue */ fts_optimize_wq = ib_wqueue_create(); - fts_opt_shutdown_event = os_event_create(0); ut_a(fts_optimize_wq != NULL); + + /* Create FTS vector to store fts_slot_t */ + heap = mem_heap_create(sizeof(dict_table_t*) * 64); + heap_alloc = ib_heap_allocator_create(heap); + fts_slots = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4); + + /* Add fts tables to the fts_slots vector which were skipped during restart */ + std::vector table_vector; + std::vector::iterator it; + + mutex_enter(&dict_sys->mutex); + for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + if (table->fts && + dict_table_has_fts_index(table)) { + if (fts_optimize_new_table(fts_slots, + table)){ + table_vector.push_back(table); + } + } + } + + /* It is better to call dict_table_prevent_eviction() + outside the above loop because it operates on + dict_sys->table_LRU list.*/ + for (it=table_vector.begin();it!=table_vector.end();++it) { + dict_table_prevent_eviction(*it); + } + + mutex_exit(&dict_sys->mutex); + table_vector.clear(); + + fts_opt_shutdown_event = os_event_create(0); last_check_sync_time = ut_time(); os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL); From c88ac735b131328657ef0ed402fca2a4a1e4f766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 May 2018 16:56:02 +0300 Subject: [PATCH 49/54] Adjust the test case for MariaDB --- .../suite/innodb_fts/r/fulltext_table_evict.result | 4 ++-- .../suite/innodb_fts/t/fulltext_table_evict.test | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result b/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result index 410bf2a34e1..d9d329aa6c0 100644 --- a/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result +++ b/mysql-test/suite/innodb_fts/r/fulltext_table_evict.result @@ -8,12 +8,12 @@ f1 TEXT(500), FULLTEXT idx (f1) ) ENGINE=InnoDB; insert into t1 (f1) values ('fjdhfsjhf'),('dhjfhjshfj'),('dhjafjhfj'); -# restart set @save_table_definition_cache=@@global.table_definition_cache; set @save_table_open_cache=@@global.table_open_cache; set global table_definition_cache=400; set global table_open_cache= 1024; -SET GLOBAL DEBUG="+d,crash_if_fts_table_is_evicted"; +SET @save_dbug = @@GLOBAL.debug_dbug; +SET GLOBAL DEBUG_DBUG="+d,crash_if_fts_table_is_evicted"; set @@global.table_definition_cache=@save_table_definition_cache; set @@global.table_open_cache=@save_table_open_cache; drop table t1; diff --git a/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test b/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test index 245f00bc336..2e7aa655aa1 100644 --- a/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test +++ b/mysql-test/suite/innodb_fts/t/fulltext_table_evict.test @@ -1,11 +1,12 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/big_test.inc + --echo # --echo # Bug Bug #27304661 MYSQL CRASH DOING SYNC INDEX ] --echo # [FATAL] INNODB: SEMAPHORE WAIT HAS LASTED > 600 --echo # ---source include/have_innodb.inc ---source include/have_debug.inc - CREATE TABLE t1 ( id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, f1 TEXT(500), @@ -21,7 +22,8 @@ set @save_table_open_cache=@@global.table_open_cache; set global table_definition_cache=400; set global table_open_cache= 1024; -SET GLOBAL DEBUG="+d,crash_if_fts_table_is_evicted"; +SET @save_dbug = @@GLOBAL.debug_dbug; +SET GLOBAL DEBUG_DBUG="+d,crash_if_fts_table_is_evicted"; #Create 1000 tables, try the best to evict t1 . --disable_query_log @@ -39,9 +41,8 @@ while($loop) dec $loop; } -SET GLOBAL DEBUG="-d,crash_if_fts_table_is_evicted"; +SET GLOBAL DEBUG_DBUG = @save_dbug; --enable_query_log set @@global.table_definition_cache=@save_table_definition_cache; set @@global.table_open_cache=@save_table_open_cache; drop table t1; - From 279f992b85bf024d4d6031a79bf41741da73c949 Mon Sep 17 00:00:00 2001 From: Aakanksha Verma Date: Mon, 29 Jan 2018 18:47:23 +0530 Subject: [PATCH 50/54] Bug #27141613 ASSERTION: TRX0REC.CC:319:COL->IS_VIRTUAL() / CRASH IN TRX_UNDO_READ_V_COLS PROBLEM ======= When add of virtual index fails with DB_TOO_BIG_RECORD , the virtual index being freed isn't removed from the list of indexes a virtual column(which is part of the index). This while the undo log is read could fetch a wrong value during rollback and cause the assertion reported in the bug particularly. FIX === Added a function that is called when the virtual index being freed would allow the index be removed from the index list of virtual column which was a field of that index. Reviwed By: Jimmy Yang RB: 18528 --- storage/innobase/dict/dict0dict.cc | 35 ++++++++++++++++++++++++++++ storage/innobase/dict/dict0mem.cc | 3 ++- storage/innobase/include/dict0dict.h | 8 ++++++- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index ff8b3d75010..42378d5520d 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2428,6 +2428,41 @@ dict_index_add_to_cache( table, index, NULL, page_no, strict)); } +/** Clears the virtual column's index list before index is +being freed. +@param[in] index Index being freed */ +void +dict_index_remove_from_v_col_list(dict_index_t* index) { + + if (dict_index_has_virtual(index)) { + const dict_col_t* col; + const dict_v_col_t* vcol; + + for (ulint i = 0; i < dict_index_get_n_fields(index); i++) { + col = dict_index_get_nth_col(index, i); + if (dict_col_is_virtual(col)) { + vcol = reinterpret_cast( + col); + /* This could be NULL, when we do add + virtual column, add index together. We do not + need to track this virtual column's index */ + if (vcol->v_indexes == NULL) { + continue; + } + dict_v_idx_list::iterator it; + for (it = vcol->v_indexes->begin(); + it != vcol->v_indexes->end(); ++it) { + dict_v_idx_t v_index = *it; + if (v_index.index == index) { + vcol->v_indexes->erase(it); + break; + } + } + } + } + } +} + /** Adds an index to the dictionary cache, with possible indexing newly added column. @param[in,out] table table on which the index is diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 7ade7735048..dcf52c93de1 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2018, MariaDB Corporation. @@ -1034,6 +1034,7 @@ dict_mem_index_free( UT_DELETE(index->rtr_track->rtr_active); } + dict_index_remove_from_v_col_list(index); mem_heap_free(index->heap); } diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 0592b64b669..51ce248b98d 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2018, MariaDB Corporation. @@ -1121,6 +1121,12 @@ dict_index_add_to_cache( ibool strict) MY_ATTRIBUTE((warn_unused_result)); +/** Clears the virtual column's index list before index is being freed. +@param[in] index Index being freed */ +void +dict_index_remove_from_v_col_list( + dict_index_t* index); + /** Adds an index to the dictionary cache, with possible indexing newly added column. @param[in] table table on which the index is From 4c7ea34ef691e1a2ed243959de016abb3df32611 Mon Sep 17 00:00:00 2001 From: Aakanksha Verma Date: Thu, 8 Feb 2018 01:25:10 +0530 Subject: [PATCH 51/54] FOLLOW-UP FIX FOR BUG#27141613 PROBLEM Issue found during ntest run is a regression of Bug #27141613. The issue is basically when index is being freed due to an error during its creation,when the index isn't added to dictionary cache its field columns are not set, the derefrencing of null col pointer during the clean of index from the virtual column's leads to a crash. NOTE: Also test i_innodb.virtual_debug was failing on 32k page size and above for the newly added scenario. Fixed that. FIX Added a check that if only the index is cached , the virtual index freeing from the virtual cols index list is performed. Reviewed by: Satya Bodapati RB: 18670 --- storage/innobase/dict/dict0dict.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 42378d5520d..4751add93d5 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2433,7 +2433,10 @@ being freed. @param[in] index Index being freed */ void dict_index_remove_from_v_col_list(dict_index_t* index) { - + /* Index is not completely formed */ + if (!index->cached) { + return; + } if (dict_index_has_virtual(index)) { const dict_col_t* col; const dict_v_col_t* vcol; From 0da98472b70d869c719e62a281a89da5d9700178 Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Wed, 14 Feb 2018 12:51:05 +0530 Subject: [PATCH 52/54] Bug #23593654 CRASH IN BUF_BLOCK_FROM_AHI WHEN LARGE PAGES AND AHI ARE ENABLED Problem: Fix for Bug #21348684 (#Rb9581) introduced a conditional debug execute 'buf_pool_resize_chunk_null', which causes new chunks memory for 2nd buffer pool instance is freed. Buffer pool resize function removes all old chunks entry from 'buf_chunk_map_reg' and add new chunks entry into it. But when 'buf_pool_resize_chunk_null' is set true, 2nd buffer pool instance's chunk entries are not added into 'buf_chunk_map_reg'. When purge thread tries to access that buffer chunk, it leads to debug assertion. Fix: Added old chunk entries into 'buf_chunk_map_reg' for 2nd buffer pool instance when 'buf_pool_resize_chunk_null' debug condition is set to true. Reviewed by: Jimmy RB: 18664 --- storage/innobase/buf/buf0buf.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 3aa99dfd7d2..2f385b6f8e6 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2018, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2013, 2018, MariaDB Corporation. @@ -2869,6 +2869,9 @@ withdraw_retry: = buf_pool->n_chunks; warning = true; buf_pool->chunks_old = NULL; + for (ulint j = 0; j < buf_pool->n_chunks_new; j++) { + buf_pool_register_chunk(&(buf_pool->chunks[j])); + } goto calc_buf_pool_size; } From 8c4f3b316e57e5a411247adba74bb333958ce573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Sat, 12 May 2018 10:27:19 +0300 Subject: [PATCH 53/54] After-merge fix --- .../galera/r/galera_ist_mysqldump.result | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/mysql-test/suite/galera/r/galera_ist_mysqldump.result b/mysql-test/suite/galera/r/galera_ist_mysqldump.result index f4a8e9d240f..58a3ca297f8 100644 --- a/mysql-test/suite/galera/r/galera_ist_mysqldump.result +++ b/mysql-test/suite/galera/r/galera_ist_mysqldump.result @@ -1,11 +1,16 @@ Setting SST method to mysqldump ... call mtr.add_suppression("WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to '127.0.0.1'"); call mtr.add_suppression("Failed to load slave replication state from table mysql.gtid_slave_pos"); +connection node_1; CREATE USER 'sst'; GRANT ALL PRIVILEGES ON *.* TO 'sst'; SET GLOBAL wsrep_sst_auth = 'sst:'; +connection node_2; SET GLOBAL wsrep_sst_method = 'mysqldump'; +connection node_1; +connection node_2; Performing State Transfer on a server that has been shut down cleanly and restarted +connection node_1; CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; @@ -15,6 +20,7 @@ INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; +connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node2_committed_before'); @@ -24,6 +30,7 @@ INSERT INTO t1 VALUES ('node2_committed_before'); INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Shutting down server ... +connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node1_committed_during'); @@ -38,6 +45,7 @@ INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +connect node_1a_galera_st_shutdown_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); @@ -45,6 +53,7 @@ INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +connection node_2; Starting server ... SET AUTOCOMMIT=OFF; START TRANSACTION; @@ -54,6 +63,7 @@ INSERT INTO t1 VALUES ('node2_committed_after'); INSERT INTO t1 VALUES ('node2_committed_after'); INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; +connection node_1; INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); @@ -68,6 +78,7 @@ INSERT INTO t1 VALUES ('node1_committed_after'); INSERT INTO t1 VALUES ('node1_committed_after'); INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; +connection node_1a_galera_st_shutdown_slave; INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); @@ -82,6 +93,7 @@ COUNT(*) = 0 1 COMMIT; SET AUTOCOMMIT=ON; +connection node_1; SELECT COUNT(*) = 35 FROM t1; COUNT(*) = 35 1 @@ -92,6 +104,7 @@ DROP TABLE t1; COMMIT; SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been killed and restarted +connection node_1; CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; @@ -101,6 +114,7 @@ INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; +connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node2_committed_before'); @@ -110,6 +124,7 @@ INSERT INTO t1 VALUES ('node2_committed_before'); INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Killing server ... +connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node1_committed_during'); @@ -124,6 +139,7 @@ INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); @@ -131,6 +147,7 @@ INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +connection node_2; Performing --wsrep-recover ... Starting server ... Using --wsrep-start-position when starting mysqld ... @@ -142,6 +159,7 @@ INSERT INTO t1 VALUES ('node2_committed_after'); INSERT INTO t1 VALUES ('node2_committed_after'); INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; +connection node_1; INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); INSERT INTO t1 VALUES ('node1_to_be_committed_after'); @@ -156,6 +174,7 @@ INSERT INTO t1 VALUES ('node1_committed_after'); INSERT INTO t1 VALUES ('node1_committed_after'); INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; +connection node_1a_galera_st_kill_slave; INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); @@ -170,6 +189,7 @@ COUNT(*) = 0 1 COMMIT; SET AUTOCOMMIT=ON; +connection node_1; SELECT COUNT(*) = 35 FROM t1; COUNT(*) = 35 1 @@ -181,6 +201,7 @@ COMMIT; SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been killed and restarted while a DDL was in progress on it +connection node_1; CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; @@ -189,6 +210,7 @@ INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); INSERT INTO t1 VALUES ('node1_committed_before'); +connection node_2; START TRANSACTION; INSERT INTO t1 VALUES ('node2_committed_before'); INSERT INTO t1 VALUES ('node2_committed_before'); @@ -197,9 +219,12 @@ INSERT INTO t1 VALUES ('node2_committed_before'); INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; SET GLOBAL debug_dbug = 'd,sync.alter_opened_table'; +connection node_1; ALTER TABLE t1 ADD COLUMN f2 INTEGER; +connection node_2; SET wsrep_sync_wait = 0; Killing server ... +connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 (f1) VALUES ('node1_committed_during'); @@ -214,6 +239,7 @@ INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); @@ -221,7 +247,9 @@ INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); +connection node_2; Performing --wsrep-recover ... +connection node_2; Starting server ... Using --wsrep-start-position when starting mysqld ... SET AUTOCOMMIT=OFF; @@ -232,6 +260,7 @@ INSERT INTO t1 (f1) VALUES ('node2_committed_after'); INSERT INTO t1 (f1) VALUES ('node2_committed_after'); INSERT INTO t1 (f1) VALUES ('node2_committed_after'); COMMIT; +connection node_1; INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); @@ -246,6 +275,7 @@ INSERT INTO t1 (f1) VALUES ('node1_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_committed_after'); INSERT INTO t1 (f1) VALUES ('node1_committed_after'); COMMIT; +connection node_1a_galera_st_kill_slave_ddl; INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); @@ -263,6 +293,7 @@ COUNT(*) = 0 1 COMMIT; SET AUTOCOMMIT=ON; +connection node_1; SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; COUNT(*) = 2 1 @@ -276,8 +307,10 @@ DROP TABLE t1; COMMIT; SET AUTOCOMMIT=ON; SET GLOBAL debug_dbug = $debug_orig; +connection node_1; CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; +connection node_2; CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); CALL mtr.add_suppression("Can't open and lock time zone table"); From 77867c147b8a278977203ad33d7daff0587a112a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Sat, 12 May 2018 10:28:54 +0300 Subject: [PATCH 54/54] dict_create_index_tree_in_mem(): Remove dead code In InnoDB, CREATE TEMPORARY TABLE does not allow FULLTEXT INDEX. Replace a condition with a debug assertion, and add a test. --- mysql-test/suite/innodb_fts/r/basic.result | 7 +++++++ mysql-test/suite/innodb_fts/t/basic.test | 8 ++++++++ storage/innobase/dict/dict0crea.cc | 8 ++------ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/mysql-test/suite/innodb_fts/r/basic.result b/mysql-test/suite/innodb_fts/r/basic.result index ae23b93dc84..d96127fbc34 100644 --- a/mysql-test/suite/innodb_fts/r/basic.result +++ b/mysql-test/suite/innodb_fts/r/basic.result @@ -1,3 +1,10 @@ +CREATE TEMPORARY TABLE articles ( +id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, +title VARCHAR(200), +body TEXT, +FULLTEXT (title,body) +) ENGINE=InnoDB; +ERROR HY000: Cannot create FULLTEXT index on temporary InnoDB table CREATE TABLE articles ( id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, title VARCHAR(200), diff --git a/mysql-test/suite/innodb_fts/t/basic.test b/mysql-test/suite/innodb_fts/t/basic.test index 58f36be08a5..0c0920c5f16 100644 --- a/mysql-test/suite/innodb_fts/t/basic.test +++ b/mysql-test/suite/innodb_fts/t/basic.test @@ -3,6 +3,14 @@ -- source include/have_innodb.inc # Create FTS table +--error ER_INNODB_NO_FT_TEMP_TABLE +CREATE TEMPORARY TABLE articles ( + id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, + title VARCHAR(200), + body TEXT, + FULLTEXT (title,body) + ) ENGINE=InnoDB; + CREATE TABLE articles ( id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, title VARCHAR(200), diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index c1ee4f3bc4e..a83ee896972 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -943,14 +943,10 @@ dict_create_index_tree_in_mem( const trx_t* trx) /*!< in: InnoDB transaction handle */ { mtr_t mtr; - ulint page_no = FIL_NULL; + ulint page_no; ut_ad(mutex_own(&dict_sys->mutex)); - - if (index->type == DICT_FTS) { - /* FTS index does not need an index tree */ - return(DB_SUCCESS); - } + ut_ad(!(index->type & DICT_FTS)); mtr_start(&mtr); mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);