diff --git a/.clang-format b/.clang-format index 3b735b16d74..1ad93ead80a 100644 --- a/.clang-format +++ b/.clang-format @@ -1,18 +1,117 @@ -SpaceBeforeAssignmentOperators: false -SpaceAfterCStyleCast: true - -BreakBeforeBraces: Custom +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: true +BinPackParameters: true BraceWrapping: - AfterClass: true + AfterCaseLabel: true + AfterClass: true AfterControlStatement: true - AfterEnum: true - AfterFunction: true - AfterNamespace: true - AfterStruct: true - AfterUnion: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true AfterExternBlock: true - BeforeCatch: true - BeforeElse: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakStringLiterals: true +ColumnLimit: 79 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +Language: Cpp +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +... diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 46d5600acc3..52759f8583f 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -4644,8 +4644,16 @@ void do_perl(struct st_command *command) str_to_file(temp_file_path, ds_script.str, ds_script.length); + /* Use the same perl executable as the one that runs mysql-test-run.pl */ + const char *mtr_perl=getenv("MTR_PERL"); + if (!mtr_perl) + mtr_perl="perl"; + /* Format the "perl " command */ - my_snprintf(buf, sizeof(buf), "perl %s", temp_file_path); + if (strchr(mtr_perl, ' ')) + my_snprintf(buf, sizeof(buf), "\"%s\" %s", mtr_perl, temp_file_path); + else + my_snprintf(buf, sizeof(buf), "%s %s", mtr_perl, temp_file_path); if (!(res_file= my_popen(buf, "r"))) { diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc index 7354dfcb656..b6f478b943b 100644 --- a/extra/innochecksum.cc +++ b/extra/innochecksum.cc @@ -101,6 +101,8 @@ ulong srv_page_size_shift; page_size_t univ_page_size(0, 0, false); /* Current page number (0 based). */ unsigned long long cur_page_num; +/* Current space. */ +unsigned long long cur_space; /* Skip the checksum verification. */ static bool no_check; /* Enabled for strict checksum verification. */ @@ -452,6 +454,27 @@ ulint read_file( return bytes; } +/** Check whether the page contains all zeroes. +@param[in] buf page +@param[in] size physical size of the page +@return true if the page is all zeroes; else false */ +static bool is_page_all_zeroes( + byte* buf, + ulint size) +{ + /* On pages that are not all zero, the page number + must match. */ + const ulint* p = reinterpret_cast(buf); + const ulint* const end = reinterpret_cast(buf + size); + do { + if (*p++) { + return false; + } + } while (p != end); + + return true; +} + /** Check if page is corrupted or not. @param[in] buf page frame @param[in] page_size page size @@ -463,10 +486,10 @@ ulint read_file( static bool is_page_corrupted( - byte* buf, + byte* buf, const page_size_t& page_size, - bool is_encrypted, - bool is_compressed) + bool is_encrypted, + bool is_compressed) { /* enable if page is corrupted. */ @@ -479,6 +502,24 @@ is_page_corrupted( ulint space_id = mach_read_from_4( buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + if (mach_read_from_4(buf + FIL_PAGE_OFFSET) != cur_page_num + || space_id != cur_space) { + /* On pages that are not all zero, the page number + must match. */ + if (is_page_all_zeroes(buf, page_size.physical())) { + return false; + } + + if (is_log_enabled) { + fprintf(log_file, + "page id mismatch space::" ULINTPF + " page::%llu \n", + space_id, cur_page_num); + } + + return true; + } + /* We can't trust only a page type, thus we take account also fsp_flags or crypt_data on page 0 */ if ((page_type == FIL_PAGE_PAGE_COMPRESSED && is_compressed) || @@ -1577,9 +1618,6 @@ int main( FILE* fil_page_type = NULL; fpos_t pos; - /* Use to check the space id of given file. If space_id is zero, - then check whether page is doublewrite buffer.*/ - ulint space_id = 0UL; /* enable when space_id of given file is zero. */ bool is_system_tablespace = false; @@ -1701,9 +1739,8 @@ int main( /* enable variable is_system_tablespace when space_id of given file is zero. Use to skip the checksum verification and rewrite for doublewrite pages. */ - is_system_tablespace = (!memcmp(&space_id, buf + - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4)) - ? true : false; + cur_space = mach_read_from_4(buf + FIL_PAGE_SPACE_ID); + cur_page_num = mach_read_from_4(buf + FIL_PAGE_OFFSET); /* Determine page size, zip_size and page compression from fsp_flags and encryption metadata from page 0 */ @@ -1715,7 +1752,9 @@ int main( physical_page_size = page_size.physical(); bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags); - if (page_size.physical() > UNIV_ZIP_SIZE_MIN) { + if (physical_page_size == UNIV_ZIP_SIZE_MIN) { + partial_page_read = false; + } else { /* Read rest of the page 0 to determine crypt_data */ bytes = read_file(buf, partial_page_read, page_size.physical(), fil_in); if (bytes != page_size.physical()) { @@ -1730,6 +1769,7 @@ int main( partial_page_read = false; } + /* Now that we have full page 0 in buffer, check encryption */ bool is_encrypted = check_encryption(filename, page_size, buf); @@ -1740,7 +1780,9 @@ int main( unsigned long long tmp_allow_mismatches = allow_mismatches; allow_mismatches = 0; - exit_status = verify_checksum(buf, page_size, is_encrypted, is_compressed, &mismatch_count); + exit_status = verify_checksum( + buf, page_size, is_encrypted, + is_compressed, &mismatch_count); if (exit_status) { fprintf(stderr, "Error: Page 0 checksum mismatch, can't continue. \n"); @@ -1804,6 +1846,36 @@ int main( } } + off_t cur_offset = 0; + /* Find the first non all-zero page and fetch the + space id from there. */ + while (is_page_all_zeroes(buf, physical_page_size)) { + bytes = ulong(read_file( + buf, false, physical_page_size, + fil_in)); + + if (feof(fil_in)) { + fprintf(stderr, "All are " + "zero-filled pages."); + goto my_exit; + } + + cur_offset++; + } + + cur_space = mach_read_from_4(buf + FIL_PAGE_SPACE_ID); + is_system_tablespace = (cur_space == 0); + + if (cur_offset > 0) { + /* Re-read the non-zero page to check the + checksum. So move the file pointer to + previous position and reset the page number too. */ + cur_page_num = mach_read_from_4(buf + FIL_PAGE_OFFSET); + if (!start_page) { + goto first_non_zero; + } + } + /* seek to the necessary position */ if (start_page) { if (!read_from_stdin) { @@ -1901,6 +1973,7 @@ int main( goto my_exit; } +first_non_zero: if (is_system_tablespace) { /* enable when page is double write buffer.*/ skip_page = is_page_doublewritebuffer(buf); @@ -1921,8 +1994,10 @@ int main( checksum verification.*/ if (!no_check && !skip_page - && (exit_status = verify_checksum(buf, page_size, - is_encrypted, is_compressed, &mismatch_count))) { + && (exit_status = verify_checksum( + buf, page_size, + is_encrypted, is_compressed, + &mismatch_count))) { goto my_exit; } diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index c06d677786c..8775584bff0 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -3794,7 +3794,7 @@ open_or_create_log_file( fil_space_t* space, ulint i) /*!< in: log file number in group */ { - char name[10000]; + char name[FN_REFLEN]; ulint dirnamelen; os_normalize_path(srv_log_group_home_dir); diff --git a/include/maria.h b/include/maria.h index fbe0a70c698..880b7367c0b 100644 --- a/include/maria.h +++ b/include/maria.h @@ -1,5 +1,5 @@ /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. - Copyright (c) 2009, 2013, Monty Program Ab. + Copyright (c) 2009, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -286,7 +286,6 @@ extern int maria_close(MARIA_HA *file); extern int maria_delete(MARIA_HA *file, const uchar *buff); extern MARIA_HA *maria_open(const char *name, int mode, uint wait_if_locked); -extern MARIA_HA *maria_clone(struct st_maria_share *share, int mode); extern int maria_panic(enum ha_panic_function function); extern int maria_rfirst(MARIA_HA *file, uchar *buf, int inx); extern int maria_rkey(MARIA_HA *file, uchar *buf, int inx, diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm index 3f1e63bcfb9..44074d0838d 100644 --- a/mysql-test/lib/mtr_report.pm +++ b/mysql-test/lib/mtr_report.pm @@ -36,6 +36,21 @@ use POSIX qw[ _exit ]; use IO::Handle qw[ flush ]; use mtr_results; +use Term::ANSIColor; + +my %color_map = qw/pass green + retry-pass green + fail red + retry-fail red + disabled bright_black + skipped yellow + reset reset/; +sub xterm_color { + if (-t STDOUT and defined $ENV{TERM} and $ENV{TERM} =~ /xterm/) { + syswrite STDOUT, color($color_map{$_[0]}); + } +} + my $tot_real_time= 0; our $timestamp= 0; @@ -498,7 +513,16 @@ sub mtr_print (@) { sub mtr_report (@) { if (defined $verbose) { - print _name(). join(" ", @_). "\n"; + my @s = split /\[ (\S+) \]/, _name() . "@_\n"; + if (@s > 1) { + print $s[0]; + xterm_color($s[1]); + print "[ $s[1] ]"; + xterm_color('reset'); + print $s[2]; + } else { + print $s[0]; + } } } diff --git a/mysql-test/main/join_outer.result b/mysql-test/main/join_outer.result index 040fa137fb9..b02dcb963db 100644 --- a/mysql-test/main/join_outer.result +++ b/mysql-test/main/join_outer.result @@ -2511,6 +2511,40 @@ WHERE tb1.pk = 40 ORDER BY tb1.i1; v2 DROP TABLE t1,t2; +# +# MDEV-19790 : IS NOT TRUE / IS NOT FALSE predicates over +# inner tables of outer joins +# +create table t1 (a int); +create table t2 (b int); +insert into t1 values (3), (7), (1); +insert into t2 values (7), (4), (3); +select * from t1 left join t2 on a=b; +a b +3 3 +7 7 +1 NULL +select * from t1 left join t2 on a=b where (b > 3) is not true; +a b +3 3 +1 NULL +explain extended select * from t1 left join t2 on a=b where (b > 3) is not true; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`b` = `test`.`t1`.`a`) where `test`.`t2`.`b` > 3 is not true +select * from t1 left join t2 on a=b where (b > 3) is not false; +a b +7 7 +1 NULL +explain extended select * from t1 left join t2 on a=b where (b > 3) is not false; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 100.00 +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`b` = `test`.`t1`.`a`) where `test`.`t2`.`b` > 3 is not false +drop table t1,t2; # end of 5.5 tests # # MDEV-19258: chained right joins all converted to inner joins diff --git a/mysql-test/main/join_outer.test b/mysql-test/main/join_outer.test index c9ac0224745..c02b2e32580 100644 --- a/mysql-test/main/join_outer.test +++ b/mysql-test/main/join_outer.test @@ -2041,6 +2041,29 @@ ORDER BY tb1.i1; DROP TABLE t1,t2; +--echo # +--echo # MDEV-19790 : IS NOT TRUE / IS NOT FALSE predicates over +--echo # inner tables of outer joins +--echo # + +create table t1 (a int); +create table t2 (b int); +insert into t1 values (3), (7), (1); +insert into t2 values (7), (4), (3); +select * from t1 left join t2 on a=b; + +let $q= +select * from t1 left join t2 on a=b where (b > 3) is not true; +eval $q; +eval explain extended $q; + +let $q= +select * from t1 left join t2 on a=b where (b > 3) is not false; +eval $q; +eval explain extended $q; + +drop table t1,t2; + --echo # end of 5.5 tests --echo # diff --git a/mysql-test/main/join_outer_jcl6.result b/mysql-test/main/join_outer_jcl6.result index af4ab407f07..30981b2ff98 100644 --- a/mysql-test/main/join_outer_jcl6.result +++ b/mysql-test/main/join_outer_jcl6.result @@ -2522,6 +2522,40 @@ WHERE tb1.pk = 40 ORDER BY tb1.i1; v2 DROP TABLE t1,t2; +# +# MDEV-19790 : IS NOT TRUE / IS NOT FALSE predicates over +# inner tables of outer joins +# +create table t1 (a int); +create table t2 (b int); +insert into t1 values (3), (7), (1); +insert into t2 values (7), (4), (3); +select * from t1 left join t2 on a=b; +a b +7 7 +3 3 +1 NULL +select * from t1 left join t2 on a=b where (b > 3) is not true; +a b +3 3 +1 NULL +explain extended select * from t1 left join t2 on a=b where (b > 3) is not true; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 100.00 +1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.a 3 100.00 Using where; Using join buffer (flat, BNLH join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`b` = `test`.`t1`.`a` and `test`.`t1`.`a` is not null) where `test`.`t2`.`b` > 3 is not true +select * from t1 left join t2 on a=b where (b > 3) is not false; +a b +7 7 +1 NULL +explain extended select * from t1 left join t2 on a=b where (b > 3) is not false; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 100.00 +1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.a 3 100.00 Using where; Using join buffer (flat, BNLH join) +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t1` left join `test`.`t2` on(`test`.`t2`.`b` = `test`.`t1`.`a` and `test`.`t1`.`a` is not null) where `test`.`t2`.`b` > 3 is not false +drop table t1,t2; # end of 5.5 tests # # MDEV-19258: chained right joins all converted to inner joins diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 23b3712a094..56398cde2bd 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -371,6 +371,8 @@ main(); sub main { + $ENV{MTR_PERL}=$^X; + # Default, verbosity on report_option('verbose', 0); diff --git a/mysql-test/suite/encryption/r/debug_key_management.result b/mysql-test/suite/encryption/r/debug_key_management.result index 02e05b4d221..75e2629c1d6 100644 --- a/mysql-test/suite/encryption/r/debug_key_management.result +++ b/mysql-test/suite/encryption/r/debug_key_management.result @@ -3,6 +3,7 @@ show variables like 'innodb_encrypt%'; Variable_name Value innodb_encrypt_log ON innodb_encrypt_tables ON +innodb_encrypt_temporary_tables OFF innodb_encryption_rotate_key_age 2 innodb_encryption_rotation_iops 100 innodb_encryption_threads 4 diff --git a/mysql-test/suite/encryption/r/innochecksum.result b/mysql-test/suite/encryption/r/innochecksum.result index 59804f548ae..2a37ae1aa00 100644 --- a/mysql-test/suite/encryption/r/innochecksum.result +++ b/mysql-test/suite/encryption/r/innochecksum.result @@ -26,7 +26,7 @@ CREATE TABLE t6 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=InnoDB; # Run innochecksum on t2 # Run innochecksum on t3 # Run innochecksum on t6 -# no encryption corrupting the field should not have effect +# Space ID mismatch # Restore the original tables # Corrupt FIL_DATA+10 (data) # Run innochecksum on t2 diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_log.result b/mysql-test/suite/encryption/r/innodb_encrypt_log.result index 0663890c685..b999e8cb34a 100644 --- a/mysql-test/suite/encryption/r/innodb_encrypt_log.result +++ b/mysql-test/suite/encryption/r/innodb_encrypt_log.result @@ -22,6 +22,8 @@ key (col_int_key), key (col_char_key) ) ENGINE=InnoDB ENCRYPTED=YES ENCRYPTION_KEY_ID=1; CREATE TEMPORARY TABLE t LIKE t0; +Warnings: +Warning 1478 Ignoring encryption parameter during temporary table creation. INSERT INTO t VALUES (NULL,1,1,'private','secret'),(NULL,2,2,'sacred','success'), (NULL,3,3,'story','secure'),(NULL,4,4,'security','sacrament'); diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_temporary_tables.result b/mysql-test/suite/encryption/r/innodb_encrypt_temporary_tables.result new file mode 100644 index 00000000000..9a291ae1354 --- /dev/null +++ b/mysql-test/suite/encryption/r/innodb_encrypt_temporary_tables.result @@ -0,0 +1,19 @@ +SELECT variable_value into @old_encrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_encrypted'; +SELECT variable_value into @old_decrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_decrypted'; +CREATE TEMPORARY TABLE t1(f1 CHAR(200), f2 CHAR(200)) ENGINE=InnoDB; +INSERT INTO t1 (f1,f2) SELECT '', '' FROM seq_1_to_8192; +CREATE TEMPORARY TABLE t2(f1 CHAR(100), f2 CHAR(200), f3 CHAR(200))ENGINE=InnoDB; +INSERT INTO t2 (f1,f2,f3) SELECT '', '', '' FROM seq_1_to_8192; +SELECT COUNT(*) FROM t1; +COUNT(*) +8192 +SELECT variable_value > @old_encrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_encrypted'; +variable_value > @old_encrypted +1 +SELECT variable_value > @old_decrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_decrypted'; +variable_value > @old_decrypted +1 diff --git a/mysql-test/suite/encryption/r/innodb_encryption.result b/mysql-test/suite/encryption/r/innodb_encryption.result index 559430e0210..9fa422bfd91 100644 --- a/mysql-test/suite/encryption/r/innodb_encryption.result +++ b/mysql-test/suite/encryption/r/innodb_encryption.result @@ -3,6 +3,7 @@ SHOW VARIABLES LIKE 'innodb_encrypt%'; Variable_name Value innodb_encrypt_log ON innodb_encrypt_tables ON +innodb_encrypt_temporary_tables OFF innodb_encryption_rotate_key_age 15 innodb_encryption_rotation_iops 100 innodb_encryption_threads 4 @@ -57,6 +58,7 @@ SHOW VARIABLES LIKE 'innodb_encrypt%'; Variable_name Value innodb_encrypt_log ON innodb_encrypt_tables OFF +innodb_encrypt_temporary_tables OFF innodb_encryption_rotate_key_age 15 innodb_encryption_rotation_iops 100 innodb_encryption_threads 0 diff --git a/mysql-test/suite/encryption/t/innochecksum.test b/mysql-test/suite/encryption/t/innochecksum.test index f57b8d265bb..06d3ff46b81 100644 --- a/mysql-test/suite/encryption/t/innochecksum.test +++ b/mysql-test/suite/encryption/t/innochecksum.test @@ -193,7 +193,8 @@ EOF --exec $INNOCHECKSUM $t3_IBD --echo # Run innochecksum on t6 ---echo # no encryption corrupting the field should not have effect +--echo # Space ID mismatch +--error 1 --exec $INNOCHECKSUM $t6_IBD --enable_result_log diff --git a/mysql-test/suite/encryption/t/innodb-encryption-alter.test b/mysql-test/suite/encryption/t/innodb-encryption-alter.test index d9a0e4e95cd..2e18b15079b 100644 --- a/mysql-test/suite/encryption/t/innodb-encryption-alter.test +++ b/mysql-test/suite/encryption/t/innodb-encryption-alter.test @@ -123,6 +123,7 @@ SET DEBUG_SYNC = 'now WAIT_FOR done'; SET GLOBAL innodb_flush_log_at_trx_commit=1; COMMIT; +let $restart_parameters = --innodb_encryption_threads=2; --let $shutdown_timeout= 0 --source include/restart_mysqld.inc disconnect con1; diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt new file mode 100644 index 00000000000..70797302d01 --- /dev/null +++ b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt @@ -0,0 +1,2 @@ +--innodb_buffer_pool_size=5M +--innodb_encrypt_temporary_tables=1 diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.test b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.test new file mode 100644 index 00000000000..6c16afa28f4 --- /dev/null +++ b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.test @@ -0,0 +1,23 @@ +--source include/have_sequence.inc +--source include/have_innodb.inc +--source include/have_file_key_management_plugin.inc + +SELECT variable_value into @old_encrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_encrypted'; + +SELECT variable_value into @old_decrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_decrypted'; + +CREATE TEMPORARY TABLE t1(f1 CHAR(200), f2 CHAR(200)) ENGINE=InnoDB; +INSERT INTO t1 (f1,f2) SELECT '', '' FROM seq_1_to_8192; + +CREATE TEMPORARY TABLE t2(f1 CHAR(100), f2 CHAR(200), f3 CHAR(200))ENGINE=InnoDB; +INSERT INTO t2 (f1,f2,f3) SELECT '', '', '' FROM seq_1_to_8192; + +SELECT COUNT(*) FROM t1; +SELECT variable_value > @old_encrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_encrypted'; + +SELECT variable_value > @old_decrypted FROM information_schema.global_status +WHERE variable_name = 'innodb_encryption_n_temp_blocks_decrypted'; +--source include/restart_mysqld.inc diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index a89109f6291..1e6a89bfabf 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -16,6 +16,7 @@ MW-328B : MDEV-17847 Galera test failure on MW-328[A|B|C] MW-328C : MDEV-17847 Galera test failure on MW-328[A|B|C] MW-329 : wsrep_local_replays not stable MW-336 : MDEV-19746 Galera test failures because of wsrep_slave_threads identification +MW-388: MDEV-19803 Long semaphore wait error on galera.MW-388 MW-416 : MDEV-13549 Galera test failures MW-44 : MDEV-15809 Test failure on galera.MW-44 galera_account_management : MariaDB 10.0 does not support ALTER USER diff --git a/mysql-test/suite/innodb/r/alter_missing_tablespace.result b/mysql-test/suite/innodb/r/alter_missing_tablespace.result index 237d0df26ff..f9db353af3c 100644 --- a/mysql-test/suite/innodb/r/alter_missing_tablespace.result +++ b/mysql-test/suite/innodb/r/alter_missing_tablespace.result @@ -8,23 +8,23 @@ CREATE TABLE `x..d` (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; CREATE TABLE t1(a SERIAL)ENGINE=InnoDB; INSERT INTO t1 VALUES(1),(2),(3); SELECT * FROM t; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB ALTER TABLE t ADD INDEX (a), ALGORITHM=INPLACE; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB SHOW WARNINGS; Level Code Message Warning 1812 Tablespace is missing for table 'test/t' -Error 1932 Table 'test.t' doesn't exist in engine +Error 1030 Got error 194 "Tablespace is missing for a table" from storage engine InnoDB ALTER TABLE t ADD INDEX (a), ALGORITHM=COPY; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB SHOW WARNINGS; Level Code Message Warning 1812 Tablespace is missing for table 'test/t' -Error 1932 Table 'test.t' doesn't exist in engine +Error 1030 Got error 194 "Tablespace is missing for a table" from storage engine InnoDB ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=INPLACE; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=COPY; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB ALTER TABLE t ALGORITHM=INPLACE, DISCARD TABLESPACE; ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'DISCARD TABLESPACE' at line 1 ALTER TABLE t ALGORITHM=COPY, DISCARD TABLESPACE; @@ -37,7 +37,7 @@ Warning 1812 Tablespace is missing for table 'test/t' Warning 1812 Tablespace is missing for table 'test/t' DROP TABLE t; SELECT * FROM `x..d`; -ERROR 42S02: Table 'test.x..d' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB DROP TABLE `x..d`; ALTER TABLE t1 DISCARD TABLESPACE; ALTER TABLE t1 AUTO_INCREMENT=1, ALGORITHM=INPLACE; diff --git a/mysql-test/suite/innodb/r/blob-crash.result b/mysql-test/suite/innodb/r/blob-crash.result new file mode 100644 index 00000000000..85d12ff49b5 --- /dev/null +++ b/mysql-test/suite/innodb/r/blob-crash.result @@ -0,0 +1,149 @@ +# +# Bug #16963396 INNODB: USE OF LARGE EXTERNALLY-STORED FIELDS MAKES +# CRASH RECOVERY LOSE DATA +# +# +# Uncompressed Table - Insert Operation - Crash Test +# Fresh insert with blobs +# +CREATE TABLE t1 (a BIGINT PRIMARY KEY, b LONGBLOB) ENGINE=InnoDB; +INSERT INTO t1 (a, b) VALUES (1, repeat('^', 40000)); +INSERT INTO t1 (a, b) VALUES (2, '2'); +INSERT INTO t1 (a, b) VALUES (3, '3'); +INSERT INTO t1 (a, b) VALUES (4, '4'); +INSERT INTO t1 (a, b) VALUES (5, '5'); +begin; +INSERT INTO t1 (a, b) VALUES (6, REPEAT('a', 4*1024*1024)); +SELECT a, right(b, 50) FROM t1; +a right(b, 50) +1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2 2 +3 3 +4 4 +5 5 +# +# Uncompressed Table - UPDATE Operation - Crash Test +# Update of non-blob column so that blob is needed. +# +begin; +UPDATE t1 set b = REPEAT('a', 4*1024*1024) where a = 5 ; +SELECT a, right(b, 50) FROM t1; +a right(b, 50) +1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2 2 +3 3 +4 4 +5 5 +# +# Uncompressed Table - UPDATE Operation - Crash Test +# Update of blob column to blob. +# +connect con1,localhost,root,,; +begin; +UPDATE t1 set b = REPEAT('$', 50000) where a = 1; +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +UPDATE t1 SET b='five' WHERE a=5; +disconnect con1; +SELECT a, right(b, 50) FROM t1; +a right(b, 50) +1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +2 2 +3 3 +4 4 +5 five +# +# Uncompressed Table - Rollback of UPDATE operation +# Update moves offpage data to inline data. +# +create table t2 (f1 bigint primary key, f2 longblob, f3 longblob, +index(f2(10), f3(10))) engine=innodb; +insert into t2 values (10, repeat('.', 40000), repeat('?', 40000)); +connect con1,localhost,root,,; +begin; +update t2 set f2 = '='; +select f1, right(f2, 20), right(f3, 20) from t2; +f1 right(f2, 20) right(f3, 20) +10 = ???????????????????? +update t2 set f3 = '&'; +select f1, right(f2, 20), right(f3, 20) from t2; +f1 right(f2, 20) right(f3, 20) +10 = & +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=1; +disconnect con1; +select f1, right(f2, 20), right(f3, 20) from t2; +f1 right(f2, 20) right(f3, 20) +10 .................... ???????????????????? +check table t2; +Table Op Msg_type Msg_text +test.t2 check status OK +drop table t2; +# +# Compressed Table - Insert Operation - Crash Test +# fresh insert with BLOBs +# +set global innodb_compression_level = 0; +create table t3 (f1 bigint primary key, f2 longblob, f3 longblob, +index(f2(10), f3(10))) engine=innodb row_format=compressed; +connect con1,localhost,root,,; +begin; +INSERT INTO t3 (f1, f2, f3) VALUES (6, repeat('/', 40000), repeat('<', 40000)); +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=2; +disconnect con1; +select f1, length(f2), length(f3) from t3; +f1 length(f2) length(f3) +select f1, right(f2, 30), right(f3, 20) from t3; +f1 right(f2, 30) right(f3, 20) +check table t3; +Table Op Msg_type Msg_text +test.t3 check status OK +# +# Compressed Table - Update Operation - Crash Test +# update of a non-BLOB column so that BLOB is needed +# +set global innodb_compression_level = 0; +insert into t3 values (2, repeat('!', 30), repeat('+', 30)); +connect con1,localhost,root,,; +begin; +UPDATE t3 set f2 = repeat('>', 40000) where f1 = 2; +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=3; +disconnect con1; +select f1, length(f2), length(f3) from t3; +f1 length(f2) length(f3) +2 30 30 +select f1, right(f2, 30), right(f3, 20) from t3; +f1 right(f2, 30) right(f3, 20) +2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ++++++++++++++++++++ +check table t3; +Table Op Msg_type Msg_text +test.t3 check status OK +# +# Compressed Table - Update Operation - Crash Test +# update blob to blob +# +set global innodb_compression_level = 0; +insert into t3 values (3, repeat('%', 40000), repeat('~', 40000)); +connect con1,localhost,root,,; +begin; +UPDATE t3 set f2 = concat(f2, repeat(',', 10)) where f1 = 3; +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=4; +select f1, length(f2), length(f3) from t3; +f1 length(f2) length(f3) +2 30 30 +3 40000 40000 +select f1, right(f2, 30), right(f3, 20) from t3; +f1 right(f2, 30) right(f3, 20) +2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ++++++++++++++++++++ +3 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ~~~~~~~~~~~~~~~~~~~~ +check table t3; +Table Op Msg_type Msg_text +test.t3 check status OK +DROP TABLE t1,t3; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_dump_pct.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_dump_pct.result new file mode 100644 index 00000000000..d9f5e4dfeed --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_dump_pct.result @@ -0,0 +1,22 @@ +CREATE TABLE tab5 (col1 int auto_increment primary key, +col2 VARCHAR(25), col3 varchar(25)) ENGINE=InnoDB; +CREATE INDEX idx1 ON tab5(col2(10)); +CREATE INDEX idx2 ON tab5(col3(10)); +SET GLOBAL innodb_buffer_pool_filename=ib_buffer_pool100; +SET GLOBAL innodb_buffer_pool_dump_pct=100; +SELECT variable_value INTO @IBPDS +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS'; +SET GLOBAL innodb_buffer_pool_dump_now=ON; +SET GLOBAL innodb_buffer_pool_filename=ib_buffer_pool1; +SET GLOBAL innodb_buffer_pool_dump_pct=1; +SELECT @@global.innodb_buffer_pool_dump_pct; +@@global.innodb_buffer_pool_dump_pct +1 +SELECT variable_value INTO @IBPDS +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS'; +SET GLOBAL innodb_buffer_pool_dump_now=ON; +SET GLOBAL innodb_buffer_pool_dump_pct=DEFAULT; +SET GLOBAL innodb_buffer_pool_filename=DEFAULT; +DROP TABLE tab5; diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result index 3663c18ea44..d7a78f8ec90 100644 --- a/mysql-test/suite/innodb/r/innodb_mysql.result +++ b/mysql-test/suite/innodb/r/innodb_mysql.result @@ -3130,8 +3130,49 @@ ERROR 22007: Incorrect datetime value: '' DROP TABLE t1; SET SQL_MODE=DEFAULT; # -# Bug#56862 Moved to innodb_16k.test +# Bug#56862 Execution of a query that uses index merge returns a wrong result # +CREATE TABLE t1 ( +pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, +a int, +b int, +INDEX idx(a)) +ENGINE=INNODB; +INSERT INTO t1(a,b) VALUES +(11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), +(3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), +(6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), +(13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); +INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; +INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; +INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1 VALUES (1000000, 0, 0); +set @optimizer_switch_saved=@@optimizer_switch; +SET SESSION optimizer_switch='derived_merge=off'; +SET SESSION sort_buffer_size = 1024*36; +EXPLAIN +SELECT COUNT(*) FROM +(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY ALL NULL NULL NULL NULL 1537 +2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 1537 Using sort_union(idx,PRIMARY); Using where +SELECT COUNT(*) FROM +(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +COUNT(*) +1537 +set @@optimizer_switch=@optimizer_switch_saved; +SET SESSION sort_buffer_size = DEFAULT; +DROP TABLE t1; # # Test for bug #39932 "create table fails if column for FK is in different # case than in corr index". diff --git a/mysql-test/suite/innodb/r/missing_tablespaces.result b/mysql-test/suite/innodb/r/missing_tablespaces.result new file mode 100644 index 00000000000..11b79273498 --- /dev/null +++ b/mysql-test/suite/innodb/r/missing_tablespaces.result @@ -0,0 +1,11 @@ +# +# Bug#19419026 WHEN A TABLESPACE IS NOT FOUND, DO NOT REPORT "TABLE NOT FOUND" +# +CREATE DATABASE `..................................................`; +USE `..................................................`; +CREATE TABLE `..................................................` (ID INT) +ENGINE=INNODB; +select * from `..................................................`; +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB +DROP TABLE `..................................................`; +DROP DATABASE `..................................................`; diff --git a/mysql-test/suite/innodb/r/page_id_innochecksum.result b/mysql-test/suite/innodb/r/page_id_innochecksum.result new file mode 100644 index 00000000000..e2c13442fe6 --- /dev/null +++ b/mysql-test/suite/innodb/r/page_id_innochecksum.result @@ -0,0 +1,6 @@ +# Set the environmental variables +create table t1(f1 int not null)engine=innodb; +insert into t1 values(1), (2), (3); +# Change the page offset +FOUND 1 /page id mismatch/ in result.log +drop table t1; diff --git a/mysql-test/suite/innodb/r/purge.result b/mysql-test/suite/innodb/r/purge.result new file mode 100644 index 00000000000..fe10ca1f4d5 --- /dev/null +++ b/mysql-test/suite/innodb/r/purge.result @@ -0,0 +1,121 @@ +SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +# Bug #12429576 - Test an assertion failure on purge. +CREATE TABLE t1_purge ( +A int, +B blob, C blob, D blob, E blob, +F blob, G blob, H blob, +PRIMARY KEY (B(767), C(767), D(767), E(767), A), +INDEX (A) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +INSERT INTO t1_purge VALUES (1, +REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), +REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766)); +CREATE TABLE t2_purge ( +A int PRIMARY KEY, +B blob, C blob, D blob, E blob, +F blob, G blob, H blob, I blob, +J blob, K blob, L blob, +INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +INSERT INTO t2_purge VALUES (1, +REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), +REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766), +REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766)); +CREATE TABLE t3_purge ( +A int, +B varchar(800), C varchar(800), D varchar(800), E varchar(800), +F varchar(800), G varchar(800), H varchar(800), +PRIMARY KEY (B(767), C(767), D(767), E(767), A), +INDEX (A) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +INSERT INTO t3_purge SELECT * FROM t1_purge; +CREATE TABLE t4_purge ( +A int PRIMARY KEY, +B varchar(800), C varchar(800), D varchar(800), E varchar(800), +F varchar(800), G varchar(800), H varchar(800), I varchar(800), +J varchar(800), K varchar(800), L varchar(800), +INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +INSERT INTO t4_purge SELECT * FROM t2_purge; +DELETE FROM t1_purge; +DELETE FROM t2_purge; +DELETE FROM t3_purge; +DELETE FROM t4_purge; +SET @r=REPEAT('a',500); +CREATE TABLE t12637786(a int, +v1 varchar(500), v2 varchar(500), v3 varchar(500), +v4 varchar(500), v5 varchar(500), v6 varchar(500), +v7 varchar(500), v8 varchar(500), v9 varchar(500), +v10 varchar(500), v11 varchar(500), v12 varchar(500), +v13 varchar(500), v14 varchar(500), v15 varchar(500), +v16 varchar(500), v17 varchar(500), v18 varchar(500) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +CREATE INDEX idx1 ON t12637786(a,v1); +INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +UPDATE t12637786 SET a=1000; +DELETE FROM t12637786; +# Bug#12963823 - Test that the purge thread does not crash when +CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob, +i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob) +ENGINE=innodb ROW_FORMAT=dynamic; +SET @r = REPEAT('a', 767); +INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r); +CREATE INDEX ndx_a ON t12963823 (a(500)); +CREATE INDEX ndx_b ON t12963823 (b(500)); +CREATE INDEX ndx_c ON t12963823 (c(500)); +CREATE INDEX ndx_d ON t12963823 (d(500)); +CREATE INDEX ndx_e ON t12963823 (e(500)); +CREATE INDEX ndx_f ON t12963823 (f(500)); +CREATE INDEX ndx_k ON t12963823 (k(500)); +CREATE INDEX ndx_l ON t12963823 (l(500)); +SET @r = REPEAT('b', 500); +UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r; +UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r; +UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r; +UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r; +ALTER TABLE t12963823 DROP INDEX ndx_a; +ALTER TABLE t12963823 DROP INDEX ndx_b; +CREATE INDEX ndx_g ON t12963823 (g(500)); +CREATE INDEX ndx_h ON t12963823 (h(500)); +CREATE INDEX ndx_i ON t12963823 (i(500)); +CREATE INDEX ndx_j ON t12963823 (j(500)); +CREATE INDEX ndx_m ON t12963823 (m(500)); +CREATE INDEX ndx_n ON t12963823 (n(500)); +CREATE INDEX ndx_o ON t12963823 (o(500)); +CREATE INDEX ndx_p ON t12963823 (p(500)); +SHOW CREATE TABLE t12963823; +Table Create Table +t12963823 CREATE TABLE `t12963823` ( + `a` blob DEFAULT NULL, + `b` blob DEFAULT NULL, + `c` blob DEFAULT NULL, + `d` blob DEFAULT NULL, + `e` blob DEFAULT NULL, + `f` blob DEFAULT NULL, + `g` blob DEFAULT NULL, + `h` blob DEFAULT NULL, + `i` blob DEFAULT NULL, + `j` blob DEFAULT NULL, + `k` blob DEFAULT NULL, + `l` blob DEFAULT NULL, + `m` blob DEFAULT NULL, + `n` blob DEFAULT NULL, + `o` blob DEFAULT NULL, + `p` blob DEFAULT NULL, + KEY `ndx_c` (`c`(500)), + KEY `ndx_d` (`d`(500)), + KEY `ndx_e` (`e`(500)), + KEY `ndx_f` (`f`(500)), + KEY `ndx_k` (`k`(500)), + KEY `ndx_l` (`l`(500)), + KEY `ndx_g` (`g`(500)), + KEY `ndx_h` (`h`(500)), + KEY `ndx_i` (`i`(500)), + KEY `ndx_j` (`j`(500)), + KEY `ndx_m` (`m`(500)), + KEY `ndx_n` (`n`(500)), + KEY `ndx_o` (`o`(500)), + KEY `ndx_p` (`p`(500)) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +InnoDB 0 transactions not purged +DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge, t12637786, t12963823; +SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; diff --git a/mysql-test/suite/innodb/r/truncate_missing.result b/mysql-test/suite/innodb/r/truncate_missing.result index 263880eccd2..423dc9eb537 100644 --- a/mysql-test/suite/innodb/r/truncate_missing.result +++ b/mysql-test/suite/innodb/r/truncate_missing.result @@ -11,7 +11,7 @@ t CREATE TABLE `t` ( UNIQUE KEY `a` (`a`) ) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1 SELECT * FROM t; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB TRUNCATE TABLE t; -ERROR 42S02: Table 'test.t' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/alter_missing_tablespace.test b/mysql-test/suite/innodb/t/alter_missing_tablespace.test index bdcbdfb4408..287aa437d10 100644 --- a/mysql-test/suite/innodb/t/alter_missing_tablespace.test +++ b/mysql-test/suite/innodb/t/alter_missing_tablespace.test @@ -35,22 +35,21 @@ INSERT INTO t1 VALUES(1),(2),(3); --source include/start_mysqld.inc -# The ER_NO_SUCH_TABLE is being thrown by ha_innobase::open(). # The table does exist, only the tablespace does not exist. ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO SELECT * FROM t; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO ALTER TABLE t ADD INDEX (a), ALGORITHM=INPLACE; SHOW WARNINGS; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO ALTER TABLE t ADD INDEX (a), ALGORITHM=COPY; SHOW WARNINGS; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=INPLACE; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO ALTER TABLE t AUTO_INCREMENT=1, ALGORITHM=COPY; --error ER_PARSE_ERROR @@ -61,7 +60,7 @@ ALTER TABLE t ALGORITHM=COPY, DISCARD TABLESPACE; ALTER TABLE t ALGORITHM=DEFAULT, DISCARD TABLESPACE; ALTER TABLE t DISCARD TABLESPACE; DROP TABLE t; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO SELECT * FROM `x..d`; DROP TABLE `x..d`; diff --git a/mysql-test/suite/innodb/t/blob-crash.test b/mysql-test/suite/innodb/t/blob-crash.test new file mode 100644 index 00000000000..b36c9318e27 --- /dev/null +++ b/mysql-test/suite/innodb/t/blob-crash.test @@ -0,0 +1,208 @@ +--source include/maybe_debug.inc +--source include/innodb_page_size_small.inc + +--echo # +--echo # Bug #16963396 INNODB: USE OF LARGE EXTERNALLY-STORED FIELDS MAKES +--echo # CRASH RECOVERY LOSE DATA +--echo # + +# .......................................................................... + +--echo # +--echo # Uncompressed Table - Insert Operation - Crash Test +--echo # Fresh insert with blobs +--echo # + +CREATE TABLE t1 (a BIGINT PRIMARY KEY, b LONGBLOB) ENGINE=InnoDB; + +# Insert a few rows (it doesn't really matter how many). These transactions +# are committed once they are acked, so they should not be lost. +INSERT INTO t1 (a, b) VALUES (1, repeat('^', 40000)); +INSERT INTO t1 (a, b) VALUES (2, '2'); +INSERT INTO t1 (a, b) VALUES (3, '3'); +INSERT INTO t1 (a, b) VALUES (4, '4'); +INSERT INTO t1 (a, b) VALUES (5, '5'); + +# The BLOB insert will fail, and should disappear. However no data committed +# up to this point should be lost. +begin; +INSERT INTO t1 (a, b) VALUES (6, REPEAT('a', 4*1024*1024)); + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +SELECT a, right(b, 50) FROM t1; + +# .......................................................................... + +--echo # +--echo # Uncompressed Table - UPDATE Operation - Crash Test +--echo # Update of non-blob column so that blob is needed. +--echo # + +# The BLOB update will fail, and should disappear. However no data committed +# up to this point should be lost. +begin; +UPDATE t1 set b = REPEAT('a', 4*1024*1024) where a = 5 ; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +SELECT a, right(b, 50) FROM t1; + +# .......................................................................... + +--echo # +--echo # Uncompressed Table - UPDATE Operation - Crash Test +--echo # Update of blob column to blob. +--echo # + +# The BLOB update will fail, and should disappear. However no data committed +# up to this point should be lost. +connect (con1,localhost,root,,); +begin; +UPDATE t1 set b = REPEAT('$', 50000) where a = 1; + +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +UPDATE t1 SET b='five' WHERE a=5; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +disconnect con1; + +SELECT a, right(b, 50) FROM t1; + +# .......................................................................... + +--echo # +--echo # Uncompressed Table - Rollback of UPDATE operation +--echo # Update moves offpage data to inline data. +--echo # + +create table t2 (f1 bigint primary key, f2 longblob, f3 longblob, + index(f2(10), f3(10))) engine=innodb; + +insert into t2 values (10, repeat('.', 40000), repeat('?', 40000)); + +connect (con1,localhost,root,,); +begin; +update t2 set f2 = '='; +select f1, right(f2, 20), right(f3, 20) from t2; +update t2 set f3 = '&'; +select f1, right(f2, 20), right(f3, 20) from t2; + +if ($have_debug) { +--disable_query_log +set DEBUG_SYNC='blob_rollback_middle SIGNAL stuck WAIT_FOR ever'; +send ROLLBACK; +--enable_query_log +} + +connection default; +if ($have_debug) { +--disable_query_log +SET DEBUG_SYNC = 'now WAIT_FOR stuck'; +--enable_query_log +} +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=1; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +disconnect con1; + +select f1, right(f2, 20), right(f3, 20) from t2; +check table t2; +drop table t2; + +# .......................................................................... + +--echo # +--echo # Compressed Table - Insert Operation - Crash Test +--echo # fresh insert with BLOBs +--echo # + +set global innodb_compression_level = 0; + +create table t3 (f1 bigint primary key, f2 longblob, f3 longblob, + index(f2(10), f3(10))) engine=innodb row_format=compressed; + +# The BLOB insert will fail, and should disappear. However no data committed +# up to this point should be lost. +connect (con1,localhost,root,,); +begin; +INSERT INTO t3 (f1, f2, f3) VALUES (6, repeat('/', 40000), repeat('<', 40000)); + +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=2; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +disconnect con1; + +select f1, length(f2), length(f3) from t3; +select f1, right(f2, 30), right(f3, 20) from t3; +check table t3; + +# .......................................................................... + +--echo # +--echo # Compressed Table - Update Operation - Crash Test +--echo # update of a non-BLOB column so that BLOB is needed +--echo # + +set global innodb_compression_level = 0; +insert into t3 values (2, repeat('!', 30), repeat('+', 30)); + +# The BLOB update will fail, and should disappear. However no data committed +# up to this point should be lost. +connect (con1,localhost,root,,); +begin; +UPDATE t3 set f2 = repeat('>', 40000) where f1 = 2; + +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=3; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +disconnect con1; + +select f1, length(f2), length(f3) from t3; +select f1, right(f2, 30), right(f3, 20) from t3; +check table t3; + +# .......................................................................... + +--echo # +--echo # Compressed Table - Update Operation - Crash Test +--echo # update blob to blob +--echo # + +set global innodb_compression_level = 0; +insert into t3 values (3, repeat('%', 40000), repeat('~', 40000)); + +# The BLOB update will fail, and should disappear. However no data committed +# up to this point should be lost. +connect (con1,localhost,root,,); +begin; +UPDATE t3 set f2 = concat(f2, repeat(',', 10)) where f1 = 3; + +connection default; +SET GLOBAL innodb_flush_log_at_trx_commit=1; +DELETE FROM t1 WHERE a=4; + +let $shutdown_timeout=0; +--source include/restart_mysqld.inc + +select f1, length(f2), length(f3) from t3; +select f1, right(f2, 30), right(f3, 20) from t3; +check table t3; + +DROP TABLE t1,t3; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_dump_pct.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_dump_pct.test new file mode 100644 index 00000000000..a7a414d61da --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_dump_pct.test @@ -0,0 +1,99 @@ +# ************************************************************ +# wl6504: This testcase is to check the functionality of the +# innodb_buffer_pool_dump_pct flag +# step 1.Set innodb_buffer_pool_dump_pct=100 and take the dump +# step 2.Set innodb_buffer_pool_dump_pct=1 and take the dump +# step 3.Compare the size of both the dump files +#************************************************************* +--source include/have_innodb.inc +--source include/have_innodb_16k.inc + +let MYSQLD_DATADIR = `SELECT @@datadir`; + +CREATE TABLE tab5 (col1 int auto_increment primary key, +col2 VARCHAR(25), col3 varchar(25)) ENGINE=InnoDB; +CREATE INDEX idx1 ON tab5(col2(10)); +CREATE INDEX idx2 ON tab5(col3(10)); + +SET GLOBAL innodb_buffer_pool_filename=ib_buffer_pool100; +SET GLOBAL innodb_buffer_pool_dump_pct=100; + +#*********************************************************** +# SELECT * +# FROM information_schema.global_status +# WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS' +# gives +# a) VARIABLE_NAME VARIABLE_VALUE +# INNODB_BUFFER_POOL_DUMP_STATUS not started +# in case there was no innodb_buffer_pool_dump since server start. +# b) Something like +# VARIABLE_NAME VARIABLE_VALUE +# INNODB_BUFFER_POOL_DUMP_STATUS Buffer pool(s) dump completed at 130711 13:43:24 +# in case there was a innodb_buffer_pool_dump since server start. +# Attention: +# - There is no guarantee that the current test is the first test which +# made an innodb_buffer_pool_dump since server startup. +# - The granularity of the timestamp is one second. +# - There could have been some dump caused by some previous test +# just a few milliseconds before. +# In order to avoid conflict with previous tests, read the current value +# of INNODB_BUFFER_POOL_DUMP_STATUS +# and confirm that the timestamp is different after the dump +#*********************************************************** + +# Read the current value to compare with the new value. +SELECT variable_value INTO @IBPDS +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS'; +SET GLOBAL innodb_buffer_pool_dump_now=ON; + +# Sleep one second in order to ensure that the time stamp is +# different at next dump +--sleep 1 + +let $wait_condition = SELECT count(*) = 1 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS' +AND variable_value != @IBPDS +AND variable_value like 'Buffer pool(s) dump completed at%'; +--source include/wait_condition.inc + +--file_exists $MYSQLD_DATADIR/ib_buffer_pool100 +SET GLOBAL innodb_buffer_pool_filename=ib_buffer_pool1; +SET GLOBAL innodb_buffer_pool_dump_pct=1; +SELECT @@global.innodb_buffer_pool_dump_pct; + +# Read the current value to compare with the new value. +--disable_warnings +SELECT variable_value INTO @IBPDS +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS'; +--enable_warnings + +SET GLOBAL innodb_buffer_pool_dump_now=ON; + +# Sleep one second in order to ensure that the time stamp is +# different at next dump +--sleep 1 + +let $wait_condition = SELECT count(*) = 1 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS' +AND variable_value != @IBPDS +AND variable_value like 'Buffer pool(s) dump completed at%'; +--source include/wait_condition.inc + +--file_exists $MYSQLD_DATADIR/ib_buffer_pool1 + +perl; +my $size1 = -s "$ENV{MYSQLD_DATADIR}/ib_buffer_pool1"; +my $size100 = -s "$ENV{MYSQLD_DATADIR}/ib_buffer_pool100"; +die "$size100<=$size1\n" unless $size100 > $size1; +EOF + +SET GLOBAL innodb_buffer_pool_dump_pct=DEFAULT; +SET GLOBAL innodb_buffer_pool_filename=DEFAULT; + +--remove_file $MYSQLD_DATADIR/ib_buffer_pool100 +--remove_file $MYSQLD_DATADIR/ib_buffer_pool1 +DROP TABLE tab5; diff --git a/mysql-test/suite/innodb/t/innodb_mysql.test b/mysql-test/suite/innodb/t/innodb_mysql.test index dc247a96468..23bba81c6bd 100644 --- a/mysql-test/suite/innodb/t/innodb_mysql.test +++ b/mysql-test/suite/innodb/t/innodb_mysql.test @@ -818,8 +818,52 @@ DROP TABLE t1; SET SQL_MODE=DEFAULT; --echo # ---echo # Bug#56862 Moved to innodb_16k.test +--echo # Bug#56862 Execution of a query that uses index merge returns a wrong result --echo # + +CREATE TABLE t1 ( + pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, + a int, + b int, + INDEX idx(a)) +ENGINE=INNODB; + +INSERT INTO t1(a,b) VALUES + (11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), + (3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), + (6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), + (13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); +INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; +INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; +INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1 VALUES (1000000, 0, 0); + +set @optimizer_switch_saved=@@optimizer_switch; +SET SESSION optimizer_switch='derived_merge=off'; +SET SESSION sort_buffer_size = 1024*36; + +EXPLAIN +SELECT COUNT(*) FROM + (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; + +SELECT COUNT(*) FROM + (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; + +set @@optimizer_switch=@optimizer_switch_saved; +SET SESSION sort_buffer_size = DEFAULT; + +DROP TABLE t1; + --echo # --echo # Test for bug #39932 "create table fails if column for FK is in different --echo # case than in corr index". diff --git a/mysql-test/suite/innodb/t/missing_tablespaces.test b/mysql-test/suite/innodb/t/missing_tablespaces.test new file mode 100644 index 00000000000..98efc51d7b8 --- /dev/null +++ b/mysql-test/suite/innodb/t/missing_tablespaces.test @@ -0,0 +1,36 @@ +--source include/have_innodb.inc + +#Restarting not supported in embedded +--source include/not_embedded.inc +#Windows has trouble creating files/directories with long names +--source include/not_windows.inc + +--echo # +--echo # Bug#19419026 WHEN A TABLESPACE IS NOT FOUND, DO NOT REPORT "TABLE NOT FOUND" +--echo # + +#Create database and tablename with all special characters + +CREATE DATABASE `..................................................`; +USE `..................................................`; +CREATE TABLE `..................................................` (ID INT) +ENGINE=INNODB; + +--source include/shutdown_mysqld.inc + +--remove_file $MYSQLTEST_VARDIR/mysqld.1/data/@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e/@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e@002e.ibd + +--source include/start_mysqld.inc + +--disable_query_log +call mtr.add_suppression("\\[ERROR\\] InnoDB: Operating system error number 2 in a file operation."); +call mtr.add_suppression("\\[ERROR\\] InnoDB: The error means the system cannot find the path specified."); +call mtr.add_suppression("\\[ERROR\\] InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them."); +call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot open datafile for read-only"); +call mtr.add_suppression("\\[Warning\\] InnoDB: Ignoring tablespace .* because it could not be opened"); +--enable_query_log + +--error ER_GET_ERRNO +select * from `..................................................`; +DROP TABLE `..................................................`; +DROP DATABASE `..................................................`; diff --git a/mysql-test/suite/innodb/t/page_id_innochecksum.test b/mysql-test/suite/innodb/t/page_id_innochecksum.test new file mode 100644 index 00000000000..5bd8a58fcd5 --- /dev/null +++ b/mysql-test/suite/innodb/t/page_id_innochecksum.test @@ -0,0 +1,52 @@ +--source include/have_innodb.inc +--echo # Set the environmental variables +let MYSQLD_BASEDIR= `SELECT @@basedir`; +let MYSQLD_DATADIR= `SELECT @@datadir`; +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; + +create table t1(f1 int not null)engine=innodb; +insert into t1 values(1), (2), (3); +let $resultlog=$MYSQLTEST_VARDIR/tmp/result.log; + +--source include/shutdown_mysqld.inc +--echo # Change the page offset +perl; +use strict; +use warnings; +use Fcntl qw(:DEFAULT :seek); +do "$ENV{MTR_SUITE_DIR}/../innodb/include/crc32.pl"; + +my $page_size = $ENV{INNODB_PAGE_SIZE}; + +sysopen IBD_FILE, "$ENV{MYSQLD_DATADIR}/test/t1.ibd", O_RDWR +|| die "Cannot open t1.ibd\n"; +sysread(IBD_FILE, $_, 38) || die "Cannot read t1.ibd\n"; +my $space = unpack("x[34]N", $_); +sysseek(IBD_FILE, $page_size * 3, SEEK_SET) || die "Cannot seek t1.ibd\n"; + +my $head = pack("Nx[18]", 4); # better to have a valid page number +my $body = chr(0) x ($page_size - 38 - 8); + +# Calculate innodb_checksum_algorithm=crc32 for the unencrypted page. +# The following bytes are excluded: +# bytes 0..3 (the checksum is stored there) +# bytes 26..37 (encryption key version, post-encryption checksum, tablespace id) +# bytes $page_size-8..$page_size-1 (checksum, LSB of FIL_PAGE_LSN) +my $polynomial = 0x82f63b78; # CRC-32C +my $ck = mycrc32($head, 0, $polynomial) ^ mycrc32($body, 0, $polynomial); + +my $page= pack("N",$ck).$head.pack("NNN",1,$ck,$space).$body.pack("Nx[4]",$ck); +die unless syswrite(IBD_FILE, $page, $page_size) == $page_size; +close IBD_FILE; +EOF + +--error 1 +exec $INNOCHECKSUM -C crc32 -l $resultlog $MYSQLD_DATADIR/test/t1.ibd; + +let SEARCH_FILE = $MYSQLTEST_VARDIR/tmp/result.log; +let SEARCH_PATTERN=page id mismatch; +--source include/search_pattern_in_file.inc + +--remove_file $resultlog +--source include/start_mysqld.inc +drop table t1; diff --git a/mysql-test/suite/innodb/t/purge.test b/mysql-test/suite/innodb/t/purge.test new file mode 100644 index 00000000000..63312e50fd8 --- /dev/null +++ b/mysql-test/suite/innodb/t/purge.test @@ -0,0 +1,117 @@ +--source include/have_innodb.inc +--source include/have_innodb_16k.inc + +SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; + +--echo # Bug #12429576 - Test an assertion failure on purge. +CREATE TABLE t1_purge ( +A int, +B blob, C blob, D blob, E blob, +F blob, G blob, H blob, +PRIMARY KEY (B(767), C(767), D(767), E(767), A), +INDEX (A) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; + +INSERT INTO t1_purge VALUES (1, +REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), +REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766)); + +CREATE TABLE t2_purge ( +A int PRIMARY KEY, +B blob, C blob, D blob, E blob, +F blob, G blob, H blob, I blob, +J blob, K blob, L blob, +INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; + +INSERT INTO t2_purge VALUES (1, +REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), +REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766), +REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766)); + +CREATE TABLE t3_purge ( +A int, +B varchar(800), C varchar(800), D varchar(800), E varchar(800), +F varchar(800), G varchar(800), H varchar(800), +PRIMARY KEY (B(767), C(767), D(767), E(767), A), +INDEX (A) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; + +INSERT INTO t3_purge SELECT * FROM t1_purge; + +CREATE TABLE t4_purge ( +A int PRIMARY KEY, +B varchar(800), C varchar(800), D varchar(800), E varchar(800), +F varchar(800), G varchar(800), H varchar(800), I varchar(800), +J varchar(800), K varchar(800), L varchar(800), +INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; + +INSERT INTO t4_purge SELECT * FROM t2_purge; + +# This would trigger the failure (Bug #12429576) +# if purge gets a chance to run before DROP TABLE t1_purge, .... +DELETE FROM t1_purge; +DELETE FROM t2_purge; +DELETE FROM t3_purge; +DELETE FROM t4_purge; +# We need to activate the purge thread before DROP TABLE. + +# Bug#12637786 - Assertion hit; ut_ad(dict_index_is_clust(index)); +# A secondary index tuple is found to be too long to fit into a page. +# This test is not in innodb_8k or innodb_4k since the bug is not about +# page size. It just tests the condition that caused the assertion. +SET @r=REPEAT('a',500); +CREATE TABLE t12637786(a int, + v1 varchar(500), v2 varchar(500), v3 varchar(500), + v4 varchar(500), v5 varchar(500), v6 varchar(500), + v7 varchar(500), v8 varchar(500), v9 varchar(500), + v10 varchar(500), v11 varchar(500), v12 varchar(500), + v13 varchar(500), v14 varchar(500), v15 varchar(500), + v16 varchar(500), v17 varchar(500), v18 varchar(500) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +CREATE INDEX idx1 ON t12637786(a,v1); +INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +UPDATE t12637786 SET a=1000; +DELETE FROM t12637786; +# We need to activate the purge thread before DROP TABLE +# to make sure it is able to clean up the old versions. + +--echo # Bug#12963823 - Test that the purge thread does not crash when +# the number of indexes has changed since the UNDO record was logged. +# This test is not in innodb_8k or innodb_4k since the bug is not about +# page size. It just tests the condition that caused the crash. +CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob, + i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob) + ENGINE=innodb ROW_FORMAT=dynamic; +SET @r = REPEAT('a', 767); +INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r); +CREATE INDEX ndx_a ON t12963823 (a(500)); +CREATE INDEX ndx_b ON t12963823 (b(500)); +CREATE INDEX ndx_c ON t12963823 (c(500)); +CREATE INDEX ndx_d ON t12963823 (d(500)); +CREATE INDEX ndx_e ON t12963823 (e(500)); +CREATE INDEX ndx_f ON t12963823 (f(500)); +CREATE INDEX ndx_k ON t12963823 (k(500)); +CREATE INDEX ndx_l ON t12963823 (l(500)); + +SET @r = REPEAT('b', 500); +UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r; +UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r; +UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r; +UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r; +ALTER TABLE t12963823 DROP INDEX ndx_a; +ALTER TABLE t12963823 DROP INDEX ndx_b; +CREATE INDEX ndx_g ON t12963823 (g(500)); +CREATE INDEX ndx_h ON t12963823 (h(500)); +CREATE INDEX ndx_i ON t12963823 (i(500)); +CREATE INDEX ndx_j ON t12963823 (j(500)); +CREATE INDEX ndx_m ON t12963823 (m(500)); +CREATE INDEX ndx_n ON t12963823 (n(500)); +CREATE INDEX ndx_o ON t12963823 (o(500)); +CREATE INDEX ndx_p ON t12963823 (p(500)); +SHOW CREATE TABLE t12963823; +# We need to activate the purge thread before DROP TABLE. + +-- source include/wait_all_purged.inc +DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge, t12637786, t12963823; +SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; diff --git a/mysql-test/suite/innodb/t/truncate_missing.test b/mysql-test/suite/innodb/t/truncate_missing.test index 832b94e3a19..c357f5bae72 100644 --- a/mysql-test/suite/innodb/t/truncate_missing.test +++ b/mysql-test/suite/innodb/t/truncate_missing.test @@ -15,8 +15,8 @@ let $datadir=`select @@datadir`; --remove_file $datadir/test/t.ibd --source include/start_mysqld.inc ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO SELECT * FROM t; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO TRUNCATE TABLE t; DROP TABLE t; diff --git a/mysql-test/suite/innodb_fts/r/fulltext2.result b/mysql-test/suite/innodb_fts/r/fulltext2.result index 9376d088244..8f8135b35cd 100644 --- a/mysql-test/suite/innodb_fts/r/fulltext2.result +++ b/mysql-test/suite/innodb_fts/r/fulltext2.result @@ -241,3 +241,33 @@ a „MySQL“ DROP TABLE t1; SET NAMES latin1; +CREATE TABLE t1 ( +FTS_DOC_ID BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, +id int(10) not null , +first_name varchar(50) NOT NULL, +last_name varchar(50) NOT NULL, +PRIMARY KEY (FTS_DOC_ID), +UNIQUE KEY idx_1 (first_name, last_name), +FULLTEXT KEY `idx_2` (first_name) +) ENGINE=InnoDB; +INSERT INTO t1 (id, first_name, last_name) VALUES +(10, 'Bart', 'Simpson'), +(11, 'Homer', 'Simpson'), +(12, 'Marge', 'Simpson'), +(13, 'Lisa', 'Simpson'), +(14, 'Maggie', 'Simpson'), +(15, 'Ned', 'Flanders'), +(16, 'Nelson', 'Muntz'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT fts_doc_id, first_name, last_name, MATCH(first_name) AGAINST('Homer' IN BOOLEAN MODE) AS score FROM t1; +fts_doc_id first_name last_name score +1 Bart Simpson 0 +2 Homer Simpson 0.7141907215118408 +4 Lisa Simpson 0 +5 Maggie Simpson 0 +3 Marge Simpson 0 +6 Ned Flanders 0 +7 Nelson Muntz 0 +DROP TABLE t1; diff --git a/mysql-test/suite/innodb_fts/t/fulltext2.test b/mysql-test/suite/innodb_fts/t/fulltext2.test index 1c31bcd0319..4dd2c78827f 100644 --- a/mysql-test/suite/innodb_fts/t/fulltext2.test +++ b/mysql-test/suite/innodb_fts/t/fulltext2.test @@ -231,3 +231,29 @@ INSERT INTO t1 VALUES('„MySQL“'); SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE); DROP TABLE t1; SET NAMES latin1; + +# +# Bug #20597981 - WRONG RELEVANCE RANKING FOR FULL TEXT SEARCHES +# WHEN FTS_DOC_ID IS PRIMARY KEY +CREATE TABLE t1 ( + FTS_DOC_ID BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, + id int(10) not null , + first_name varchar(50) NOT NULL, + last_name varchar(50) NOT NULL, + PRIMARY KEY (FTS_DOC_ID), + UNIQUE KEY idx_1 (first_name, last_name), + FULLTEXT KEY `idx_2` (first_name) +) ENGINE=InnoDB; + +INSERT INTO t1 (id, first_name, last_name) VALUES +(10, 'Bart', 'Simpson'), +(11, 'Homer', 'Simpson'), +(12, 'Marge', 'Simpson'), +(13, 'Lisa', 'Simpson'), +(14, 'Maggie', 'Simpson'), +(15, 'Ned', 'Flanders'), +(16, 'Nelson', 'Muntz'); + +analyze table t1; +SELECT fts_doc_id, first_name, last_name, MATCH(first_name) AGAINST('Homer' IN BOOLEAN MODE) AS score FROM t1; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb_zip/r/4k.result b/mysql-test/suite/innodb_zip/r/4k.result deleted file mode 100644 index e7e0d65b487..00000000000 --- a/mysql-test/suite/innodb_zip/r/4k.result +++ /dev/null @@ -1,406 +0,0 @@ -SET default_storage_engine=InnoDB; -# Test 1) Show the page size from Information Schema -SELECT variable_value FROM information_schema.global_status -WHERE LOWER(variable_name) = 'innodb_page_size'; -variable_value -4096 -# Test 2) The number of buffer pool pages is dependent upon the page size. -SELECT variable_value FROM information_schema.global_status -WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; -variable_value -{checked_valid} -# Test 3) Query some information_shema tables that are dependent upon -# the page size. -SELECT t.name table_name, t.n_cols, t.flag table_flags, -i.name index_name, i.page_no root_page, i.type, -i.n_fields, i.merge_threshold -FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, -INFORMATION_SCHEMA.INNODB_SYS_INDEXES i -WHERE t.table_id = i.table_id -AND t.name LIKE 'mysql%' - ORDER BY t.name, i.index_id; -table_name n_cols table_flags index_name root_page type n_fields merge_threshold -mysql/innodb_index_stats 11 33 PRIMARY 3 3 4 50 -mysql/innodb_table_stats 9 33 PRIMARY 3 3 2 50 -mysql/transaction_registry 8 33 PRIMARY 3 3 1 50 -mysql/transaction_registry 8 33 commit_id 4 2 1 50 -mysql/transaction_registry 8 33 begin_timestamp 5 0 1 50 -mysql/transaction_registry 8 33 commit_timestamp 6 0 2 50 -CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb; -CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb; -CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb; -CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb; -SELECT t.name table_name, t.n_cols, t.flag table_flags, -i.name index_name, i.page_no root_page, i.type, -i.n_fields, i.merge_threshold -FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, -INFORMATION_SCHEMA.INNODB_SYS_INDEXES i -WHERE t.table_id = i.table_id -AND t.name LIKE 'test%' - ORDER BY t.name, i.name; -table_name n_cols table_flags index_name root_page type n_fields merge_threshold -test/t1 5 0 PRIMARY 3 3 1 50 -test/t2 5 1 PRIMARY 3 3 1 50 -test/t3 5 37 PRIMARY 3 3 1 50 -test/t4 5 33 PRIMARY 3 3 1 50 -=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === -Space_Name Space_Type Page_Size Zip_Size Formats_Permitted Path -test/t1 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t1.ibd -test/t2 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t2.ibd -test/t3 Single DEFAULT 2048 Compressed MYSQLD_DATADIR/test/t3.ibd -test/t4 Single DEFAULT DEFAULT Dynamic MYSQLD_DATADIR/test/t4.ibd -DROP TABLE t1, t2, t3, t4; -# Test 4) The maximum row size is dependent upon the page size. -# Redundant: 1979, Compact: 1982. -# Compressed: 1982, Dynamic: 1982. -# Each row format has its own amount of overhead that -# varies depending on number of fields and other overhead. -SET SESSION innodb_strict_mode = ON; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127) -) ROW_FORMAT=redundant; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128) -) ROW_FORMAT=redundant; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) -) ROW_FORMAT=compact; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) -) ROW_FORMAT=compact; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76) -) ROW_FORMAT=compressed; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79) -) ROW_FORMAT=compressed; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) -) ROW_FORMAT=dynamic; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) -) ROW_FORMAT=dynamic; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -CREATE TABLE t1 (a varchar(64) character set utf8, -b varchar(64) character set utf8, -c varchar(64) character set utf8, -d varchar(64) character set utf8, -PRIMARY KEY (a,b,c,d)) -ENGINE=innodb; -DROP TABLE t1; -CREATE TABLE t1 (a varchar(64) character set utf8, -b varchar(64) character set utf8, -c varchar(64) character set utf8, -d varchar(65) character set utf8, -PRIMARY KEY (a,b,c,d)) -ENGINE=innodb; -ERROR 42000: Specified key was too long; max key length is 768 bytes -CREATE TABLE t1 (a varchar(64) character set utf8, -b varchar(64) character set utf8, -c varchar(64) character set utf8, -d varchar(64) character set utf8, -e varchar(64) character set utf8, -PRIMARY KEY (a), KEY (b,c,d,e)) -ENGINE=innodb; -DROP TABLE t1; -CREATE TABLE t1 (a varchar(64) character set utf8, -b varchar(64) character set utf8, -c varchar(64) character set utf8, -d varchar(64) character set utf8, -e varchar(65) character set utf8, -PRIMARY KEY (a), KEY (b,c,d,e)) -ENGINE=innodb; -ERROR 42000: Specified key was too long; max key length is 768 bytes -# Test 5) Make sure that KEY_BLOCK_SIZE=4, 2 & 1 are all -# accepted and that KEY_BLOCK_SIZE=16 & 8 are rejected -# in strict mode and converted to 4 in non-strict mode. -SET SESSION innodb_strict_mode = ON; -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4. -Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4. -Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=4 -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=2 -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=1 -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED -DROP TABLE t1; -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -Warnings: -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=16 -DROP TABLE t1; -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -Warnings: -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=8. -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=8. -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=8 -DROP TABLE t1; -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=4 -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=2 -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=1 -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED -DROP TABLE t1; -# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 -# are both rejected when innodb_file_per_table=OFF -SET SESSION innodb_strict_mode = ON; -SET GLOBAL innodb_file_per_table = OFF; -SHOW VARIABLES LIKE 'innodb_file_per_table'; -Variable_name Value -innodb_file_per_table OFF -CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; -ERROR HY000: Can't create table `test`.`t4` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4. -Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. -Error 1005 Can't create table `test`.`t4` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; -ERROR HY000: Can't create table `test`.`t5` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4. -Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. -Error 1005 Can't create table `test`.`t5` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -SET GLOBAL innodb_file_per_table = ON; -# Test 7) Not included here; 16k only -# Test 8) Test creating a table that could lead to undo log overflow. -CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, -h blob,i blob,j blob,k blob,l blob,m blob,n blob, -o blob,p blob,q blob,r blob,s blob,t blob,u blob) -ENGINE=InnoDB ROW_FORMAT=dynamic; -SET @a = repeat('a', 767); -SET @b = repeat('b', 767); -SET @c = repeat('c', 767); -SET @d = repeat('d', 767); -SET @e = repeat('e', 767); -INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a); -UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, -k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; -CREATE INDEX t1a ON t1 (a(767)); -UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, -k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; -CREATE INDEX t1b ON t1 (b(767)); -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d, -k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -ROLLBACK; -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d; -UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d, -n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -COMMIT; -CREATE INDEX t1c ON t1 (c(767)); -UPDATE t1 SET c=@e; -CREATE INDEX t1d ON t1 (d(767)); -BEGIN; -UPDATE t1 SET d=@e; -ROLLBACK; -CREATE INDEX t1e ON t1 (e(767)); -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` blob DEFAULT NULL, - `b` blob DEFAULT NULL, - `c` blob DEFAULT NULL, - `d` blob DEFAULT NULL, - `e` blob DEFAULT NULL, - `f` blob DEFAULT NULL, - `g` blob DEFAULT NULL, - `h` blob DEFAULT NULL, - `i` blob DEFAULT NULL, - `j` blob DEFAULT NULL, - `k` blob DEFAULT NULL, - `l` blob DEFAULT NULL, - `m` blob DEFAULT NULL, - `n` blob DEFAULT NULL, - `o` blob DEFAULT NULL, - `p` blob DEFAULT NULL, - `q` blob DEFAULT NULL, - `r` blob DEFAULT NULL, - `s` blob DEFAULT NULL, - `t` blob DEFAULT NULL, - `u` blob DEFAULT NULL, - KEY `t1a` (`a`(767)), - KEY `t1b` (`b`(767)), - KEY `t1c` (`c`(767)), - KEY `t1d` (`d`(767)), - KEY `t1e` (`e`(767)) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC -DROP TABLE t1; -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1( -pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), -pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), -pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), -pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), -sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), -sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), -sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), -sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), -PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, -pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), -KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, -sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) -ROW_FORMAT=Redundant ENGINE=InnoDB; -SET @r = repeat('a', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; -CREATE TABLE t1( -pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), -pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), -pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), -pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), -sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), -sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), -sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), -sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), -PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, -pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), -KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, -sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) -ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB; -SET @r = repeat('a', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; -SET SESSION innodb_strict_mode = off; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -Warnings: -Warning 139 Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -DROP TABLE t1; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; -drop table t1; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(440))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -Warnings: -Warning 139 Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -DROP TABLE t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(438))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); -DROP TABLE t1; diff --git a/mysql-test/suite/innodb_zip/r/8k.result b/mysql-test/suite/innodb_zip/r/8k.result deleted file mode 100644 index 76cc9d59aba..00000000000 --- a/mysql-test/suite/innodb_zip/r/8k.result +++ /dev/null @@ -1,437 +0,0 @@ -SET default_storage_engine=InnoDB; -# Test 1) Show the page size from Information Schema -SELECT variable_value FROM information_schema.global_status -WHERE LOWER(variable_name) = 'innodb_page_size'; -variable_value -8192 -# Test 2) The number of buffer pool pages is dependent upon the page size. -SELECT variable_value FROM information_schema.global_status -WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; -variable_value -{checked_valid} -# Test 3) Query some information_shema tables that are dependent upon -# the page size. -SELECT t.name table_name, t.n_cols, t.flag table_flags, -i.name index_name, i.page_no root_page, i.type, -i.n_fields, i.merge_threshold -FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, -INFORMATION_SCHEMA.INNODB_SYS_INDEXES i -WHERE t.table_id = i.table_id -AND t.name LIKE 'mysql%' - ORDER BY t.name, i.index_id; -table_name n_cols table_flags index_name root_page type n_fields merge_threshold -mysql/innodb_index_stats 11 33 PRIMARY 3 3 4 50 -mysql/innodb_table_stats 9 33 PRIMARY 3 3 2 50 -mysql/transaction_registry 8 33 PRIMARY 3 3 1 50 -mysql/transaction_registry 8 33 commit_id 4 2 1 50 -mysql/transaction_registry 8 33 begin_timestamp 5 0 1 50 -mysql/transaction_registry 8 33 commit_timestamp 6 0 2 50 -CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb; -CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb; -CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb; -CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb; -SELECT t.name table_name, t.n_cols, t.flag table_flags, -i.name index_name, i.page_no root_page, i.type, -i.n_fields, i.merge_threshold -FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, -INFORMATION_SCHEMA.INNODB_SYS_INDEXES i -WHERE t.table_id = i.table_id -AND t.name LIKE 'test%' - ORDER BY t.name, i.name; -table_name n_cols table_flags index_name root_page type n_fields merge_threshold -test/t1 5 0 PRIMARY 3 3 1 50 -test/t2 5 1 PRIMARY 3 3 1 50 -test/t3 5 39 PRIMARY 3 3 1 50 -test/t4 5 33 PRIMARY 3 3 1 50 -=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === -Space_Name Space_Type Page_Size Zip_Size Formats_Permitted Path -test/t1 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t1.ibd -test/t2 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t2.ibd -test/t3 Single DEFAULT 4096 Compressed MYSQLD_DATADIR/test/t3.ibd -test/t4 Single DEFAULT DEFAULT Dynamic MYSQLD_DATADIR/test/t4.ibd -DROP TABLE t1, t2, t3, t4; -# Test 4) The maximum row size is dependent upon the page size. -# Redundant: 4027, Compact: 4030. -# Compressed: 4030, Dynamic: 4030. -# Each row format has its own amount of overhead that -# varies depending on number of fields and other overhead. -SET SESSION innodb_strict_mode = ON; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155) -) ROW_FORMAT=redundant; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156) -) ROW_FORMAT=redundant; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) -) ROW_FORMAT=compact; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) -) ROW_FORMAT=compact; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103) -) ROW_FORMAT=compressed; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106) -) ROW_FORMAT=compressed; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) -) ROW_FORMAT=dynamic; -DROP TABLE t1; -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) -) ROW_FORMAT=dynamic; -ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -CREATE TABLE t1 (a varchar(128) character set utf8, -b varchar(128) character set utf8, -c varchar(128) character set utf8, -d varchar(128) character set utf8, -PRIMARY KEY (a,b,c,d)) -ENGINE=innodb; -DROP TABLE t1; -CREATE TABLE t1 (a varchar(128) character set utf8, -b varchar(128) character set utf8, -c varchar(128) character set utf8, -d varchar(129) character set utf8, -PRIMARY KEY (a,b,c,d)) -ENGINE=innodb; -ERROR 42000: Specified key was too long; max key length is 1536 bytes -CREATE TABLE t1 (a varchar(128) character set utf8, -b varchar(128) character set utf8, -c varchar(128) character set utf8, -d varchar(128) character set utf8, -e varchar(128) character set utf8, -PRIMARY KEY (a), KEY (b,c,d,e)) -ENGINE=innodb; -DROP TABLE t1; -CREATE TABLE t1 (a varchar(128) character set utf8, -b varchar(128) character set utf8, -c varchar(128) character set utf8, -d varchar(128) character set utf8, -e varchar(129) character set utf8, -PRIMARY KEY (a), KEY (b,c,d,e)) -ENGINE=innodb; -ERROR 42000: Specified key was too long; max key length is 1536 bytes -# Test 5) Make sure that KEY_BLOCK_SIZE=8, 4, 2 & 1 are all -# accepted and that KEY_BLOCK_SIZE=16 is rejected in -# strict mode and converted to 8 in non-strict mode. -SET SESSION innodb_strict_mode = ON; -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8. -Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=8 -ALTER TABLE t1 KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=4 -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=2 -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=1 -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED -DROP TABLE t1; -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -Warnings: -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=16 -DROP TABLE t1; -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=8 -DROP TABLE t1; -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=4 -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=2 -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED key_block_size=1 -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -Level Code Message -SELECT table_name, row_format, create_options -FROM information_schema.tables WHERE table_name = 't1'; -table_name row_format create_options -t1 Compressed row_format=COMPRESSED -DROP TABLE t1; -# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 -# are rejected when innodb_file_per_table=OFF -SET SESSION innodb_strict_mode = ON; -SET GLOBAL innodb_file_per_table = OFF; -SHOW VARIABLES LIKE 'innodb_file_per_table'; -Variable_name Value -innodb_file_per_table OFF -CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; -ERROR HY000: Can't create table `test`.`t4` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. -Error 1005 Can't create table `test`.`t4` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; -ERROR HY000: Can't create table `test`.`t5` (errno: 140 "Wrong create options") -SHOW WARNINGS; -Level Code Message -Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8. -Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. -Error 1005 Can't create table `test`.`t5` (errno: 140 "Wrong create options") -Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB -SET GLOBAL innodb_file_per_table = ON; -# Test 7) Not included here; 16k only -# Test 8) Test creating a table that could lead to undo log overflow. -CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, -h blob,i blob,j blob,k blob,l blob,m blob,n blob, -o blob,p blob,q blob,r blob,s blob,t blob,u blob) -ENGINE=InnoDB ROW_FORMAT=dynamic; -SET @a = repeat('a', 767); -SET @b = repeat('b', 767); -SET @c = repeat('c', 767); -SET @d = repeat('d', 767); -SET @e = repeat('e', 767); -INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a); -UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, -k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; -CREATE INDEX t1a ON t1 (a(767)); -CREATE INDEX t1b ON t1 (b(767)); -UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, -k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; -CREATE INDEX t1c ON t1 (c(767)); -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d, -k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -ROLLBACK; -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d; -UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d, -n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -COMMIT; -CREATE INDEX t1d ON t1 (d(767)); -UPDATE t1 SET d=@e; -CREATE INDEX t1e ON t1 (e(767)); -UPDATE t1 SET e=@e; -CREATE INDEX t1f ON t1 (f(767)); -UPDATE t1 SET f=@e; -CREATE INDEX t1g ON t1 (g(767)); -UPDATE t1 SET g=@e; -CREATE INDEX t1h ON t1 (h(767)); -UPDATE t1 SET h=@e; -CREATE INDEX t1i ON t1 (i(767)); -UPDATE t1 SET i=@e; -CREATE INDEX t1k ON t1 (j(767)); -CREATE INDEX t1j ON t1 (j(500)); -BEGIN; -UPDATE t1 SET j=@e; -ROLLBACK; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` blob DEFAULT NULL, - `b` blob DEFAULT NULL, - `c` blob DEFAULT NULL, - `d` blob DEFAULT NULL, - `e` blob DEFAULT NULL, - `f` blob DEFAULT NULL, - `g` blob DEFAULT NULL, - `h` blob DEFAULT NULL, - `i` blob DEFAULT NULL, - `j` blob DEFAULT NULL, - `k` blob DEFAULT NULL, - `l` blob DEFAULT NULL, - `m` blob DEFAULT NULL, - `n` blob DEFAULT NULL, - `o` blob DEFAULT NULL, - `p` blob DEFAULT NULL, - `q` blob DEFAULT NULL, - `r` blob DEFAULT NULL, - `s` blob DEFAULT NULL, - `t` blob DEFAULT NULL, - `u` blob DEFAULT NULL, - KEY `t1a` (`a`(767)), - KEY `t1b` (`b`(767)), - KEY `t1c` (`c`(767)), - KEY `t1d` (`d`(767)), - KEY `t1e` (`e`(767)), - KEY `t1f` (`f`(767)), - KEY `t1g` (`g`(767)), - KEY `t1h` (`h`(767)), - KEY `t1i` (`i`(767)), - KEY `t1k` (`j`(767)), - KEY `t1j` (`j`(500)) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC -DROP TABLE t1; -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1( -pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96), -pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96), -pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96), -pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96), -sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96), -sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96), -sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96), -sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96), -PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, -pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), -KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, -sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) -ROW_FORMAT=Redundant ENGINE=InnoDB; -SET @r = repeat('a', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; -CREATE TABLE t1( -pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96), -pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96), -pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96), -pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96), -sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96), -sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96), -sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96), -sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96), -PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, -pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), -KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, -sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) -ROW_FORMAT=Compressed KEY_BLOCK_SIZE=8 ENGINE=InnoDB; -SET @r = repeat('a', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, -@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; -SET SESSION innodb_strict_mode = off; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -Warnings: -Warning 139 Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -DROP TABLE t1; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; -Warnings: -Warning 139 Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -DROP TABLE t1; -CREATE TABLE t1( -c text NOT NULL, d text NOT NULL, -PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(440))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -Warnings: -Warning 139 Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. -DROP TABLE t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(438))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); -DROP TABLE t1; diff --git a/mysql-test/suite/innodb_zip/r/page_size,4k.rdiff b/mysql-test/suite/innodb_zip/r/page_size,4k.rdiff new file mode 100644 index 00000000000..637c33416cd --- /dev/null +++ b/mysql-test/suite/innodb_zip/r/page_size,4k.rdiff @@ -0,0 +1,469 @@ +--- page_size.result ++++ page_size,4k.result +@@ -3,7 +3,7 @@ + SELECT variable_value FROM information_schema.global_status + WHERE LOWER(variable_name) = 'innodb_page_size'; + variable_value +-16384 ++4096 + # Test 3) Query some information_shema tables that are dependent upon + # the page size. + SELECT t.name table_name, t.n_cols, t.flag table_flags, +@@ -36,13 +36,13 @@ + table_name n_cols table_flags index_name root_page type n_fields merge_threshold + test/t1 5 0 PRIMARY 3 3 1 50 + test/t2 5 1 PRIMARY 3 3 1 50 +-test/t3 5 41 PRIMARY 3 3 1 50 ++test/t3 5 37 PRIMARY 3 3 1 50 + test/t4 5 33 PRIMARY 3 3 1 50 + === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === + Space_Name Space_Type Page_Size Zip_Size Formats_Permitted Path + test/t1 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t1.ibd + test/t2 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t2.ibd +-test/t3 Single DEFAULT 8192 Compressed MYSQLD_DATADIR/test/t3.ibd ++test/t3 Single DEFAULT 2048 Compressed MYSQLD_DATADIR/test/t3.ibd + test/t4 Single DEFAULT DEFAULT Dynamic MYSQLD_DATADIR/test/t4.ibd + DROP TABLE t1, t2, t3, t4; + # Test 4) The maximum row size is dependent upon the page size. +@@ -51,141 +51,90 @@ + SET SESSION innodb_strict_mode = ON; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(211) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127) + ) ROW_FORMAT=redundant; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(212) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128) + ) ROW_FORMAT=redundant; +-ERROR 42000: Row size too large (> 8123). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) + ) ROW_FORMAT=compact; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) + ) ROW_FORMAT=compact; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(157) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76) + ) ROW_FORMAT=compressed; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(160) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79) + ) ROW_FORMAT=compressed; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) + ) ROW_FORMAT=dynamic; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +-c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +-c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247) ++c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) + ) ROW_FORMAT=dynamic; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(4) character set utf8, +-PRIMARY KEY (a,b,c,d,e)) ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++CREATE TABLE t1 (a varchar(98) character set utf8, ++b varchar(98) character set utf8, ++c varchar(98) character set utf8, ++d varchar(97) character set utf8, ++PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; + DROP TABLE t1; +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(5) character set utf8, +-PRIMARY KEY (a,b,c,d,e)) ++CREATE TABLE t1 (a varchar(98) character set utf8, ++b varchar(98) character set utf8, ++c varchar(98) character set utf8, ++d varchar(98) character set utf8, ++PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +-ERROR 42000: Specified key was too long; max key length is 3072 bytes +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(255) character set utf8, +-f varchar(4) character set utf8, +-PRIMARY KEY (a), KEY (b,c,d,e,f)) ++ERROR 42000: Specified key was too long; max key length is 1173 bytes ++CREATE TABLE t1 (a varchar(98) character set utf8, ++b varchar(98) character set utf8, ++c varchar(98) character set utf8, ++d varchar(98) character set utf8, ++e varchar(97) character set utf8, ++PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; + DROP TABLE t1; +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(255) character set utf8, +-f varchar(5) character set utf8, +-PRIMARY KEY (a), KEY (b,c,d,e,f)) ++CREATE TABLE t1 (a varchar(98) character set utf8, ++b varchar(98) character set utf8, ++c varchar(98) character set utf8, ++d varchar(98) character set utf8, ++e varchar(98) character set utf8, ++PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +-ERROR 42000: Specified key was too long; max key length is 3072 bytes ++ERROR 42000: Specified key was too long; max key length is 1173 bytes + # Test 5) KEY_BLOCK_SIZE validation + CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; ++ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message +-SELECT table_name, row_format, create_options +-FROM information_schema.tables WHERE table_name = 't1'; +-table_name row_format create_options +-t1 Compressed row_format=COMPRESSED key_block_size=16 +-ALTER TABLE t1 KEY_BLOCK_SIZE=8; ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4. ++Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") ++Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB ++CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; ++ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message +-SELECT table_name, row_format, create_options +-FROM information_schema.tables WHERE table_name = 't1'; +-table_name row_format create_options +-t1 Compressed row_format=COMPRESSED key_block_size=8 ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4. ++Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") ++Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB ++CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED; + ALTER TABLE t1 KEY_BLOCK_SIZE=4; + SHOW WARNINGS; + Level Code Message +@@ -217,15 +166,21 @@ + DROP TABLE t1; + SET SESSION innodb_strict_mode = OFF; + CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; ++Warnings: ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. + SELECT table_name, row_format, create_options + FROM information_schema.tables WHERE table_name = 't1'; + table_name row_format create_options + t1 Compressed row_format=COMPRESSED key_block_size=16 + ALTER TABLE t1 KEY_BLOCK_SIZE=8; ++Warnings: ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=8. + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=8. + SELECT table_name, row_format, create_options + FROM information_schema.tables WHERE table_name = 't1'; + table_name row_format create_options +@@ -269,6 +224,7 @@ + ERROR HY000: Can't create table `test`.`t4` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4. + Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. + Error 1005 Can't create table `test`.`t4` (errno: 140 "Wrong create options") + Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +@@ -276,105 +232,11 @@ + ERROR HY000: Can't create table `test`.`t5` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4. + Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. + Error 1005 Can't create table `test`.`t5` (errno: 140 "Wrong create options") + Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB + SET GLOBAL innodb_file_per_table = ON; +-# Test 7) This series of tests were moved from innodb-index to here +-# because the second alter table t1 assumes a 16k page size. +-# Moving the test allows the rest of innodb-index to be run on all +-# page sizes. The previously disabled portions of this test were +-# moved as well. +-CREATE TABLE t2(d varchar(17) PRIMARY KEY) ENGINE=innodb DEFAULT CHARSET=utf8; +-CREATE TABLE t3(a int PRIMARY KEY) ENGINE=innodb; +-INSERT INTO t3 VALUES (22),(44),(33),(55),(66); +-INSERT INTO t2 VALUES ('jejdkrun87'),('adfd72nh9k'), +-('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik'); +-CREATE TABLE t1(a int, b blob, c text, d text NOT NULL) +-ENGINE=innodb DEFAULT CHARSET=utf8 STATS_PERSISTENT=0; +-INSERT INTO t1 +-SELECT a,LEFT(REPEAT(d,100*a),65535),REPEAT(d,20*a),d FROM t2,t3 order by a, d; +-DROP TABLE t2, t3; +-SELECT COUNT(*) FROM t1 WHERE a=44; +-COUNT(*) +-5 +-SELECT a, +-LENGTH(b),b=LEFT(REPEAT(d,100*a),65535),LENGTH(c),c=REPEAT(d,20*a),d FROM t1 +-ORDER BY 1, 2, 3, 4, 5, 6; +-a LENGTH(b) b=LEFT(REPEAT(d,100*a),65535) LENGTH(c) c=REPEAT(d,20*a) d +-22 22000 1 4400 1 adfd72nh9k +-22 22000 1 4400 1 jejdkrun87 +-22 26400 1 5280 1 adfdpplkeock +-22 28600 1 5720 1 adfdijnmnb78k +-22 35200 1 7040 1 adfdijn0loKNHJik +-33 33000 1 6600 1 adfd72nh9k +-33 33000 1 6600 1 jejdkrun87 +-33 39600 1 7920 1 adfdpplkeock +-33 42900 1 8580 1 adfdijnmnb78k +-33 52800 1 10560 1 adfdijn0loKNHJik +-44 44000 1 8800 1 adfd72nh9k +-44 44000 1 8800 1 jejdkrun87 +-44 52800 1 10560 1 adfdpplkeock +-44 57200 1 11440 1 adfdijnmnb78k +-44 65535 1 14080 1 adfdijn0loKNHJik +-55 55000 1 11000 1 adfd72nh9k +-55 55000 1 11000 1 jejdkrun87 +-55 65535 1 13200 1 adfdpplkeock +-55 65535 1 14300 1 adfdijnmnb78k +-55 65535 1 17600 1 adfdijn0loKNHJik +-66 65535 1 13200 1 adfd72nh9k +-66 65535 1 13200 1 jejdkrun87 +-66 65535 1 15840 1 adfdpplkeock +-66 65535 1 17160 1 adfdijnmnb78k +-66 65535 1 21120 1 adfdijn0loKNHJik +-ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20)); +-ERROR 23000: Duplicate entry '22' for key 'PRIMARY' +-DELETE FROM t1 WHERE d='null'; +-ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20)); +-ERROR 23000: Duplicate entry '22' for key 'PRIMARY' +-DELETE FROM t1 WHERE a%2; +-CHECK TABLE t1; +-Table Op Msg_type Msg_text +-test.t1 check status OK +-ALTER TABLE t1 ADD PRIMARY KEY (a,b(255),c(255)), ADD KEY (b(767)); +-SELECT COUNT(*) FROM t1 WHERE a=44; +-COUNT(*) +-5 +-SELECT a, +-LENGTH(b), b=LEFT(REPEAT(d,100*a), 65535),LENGTH(c), c=REPEAT(d,20*a), d FROM t1; +-a LENGTH(b) b=LEFT(REPEAT(d,100*a), 65535) LENGTH(c) c=REPEAT(d,20*a) d +-22 22000 1 4400 1 adfd72nh9k +-22 35200 1 7040 1 adfdijn0loKNHJik +-22 28600 1 5720 1 adfdijnmnb78k +-22 26400 1 5280 1 adfdpplkeock +-22 22000 1 4400 1 jejdkrun87 +-44 44000 1 8800 1 adfd72nh9k +-44 65535 1 14080 1 adfdijn0loKNHJik +-44 57200 1 11440 1 adfdijnmnb78k +-44 52800 1 10560 1 adfdpplkeock +-44 44000 1 8800 1 jejdkrun87 +-66 65535 1 13200 1 adfd72nh9k +-66 65535 1 21120 1 adfdijn0loKNHJik +-66 65535 1 17160 1 adfdijnmnb78k +-66 65535 1 15840 1 adfdpplkeock +-66 65535 1 13200 1 jejdkrun87 +-SHOW CREATE TABLE t1; +-Table Create Table +-t1 CREATE TABLE `t1` ( +- `a` int(11) NOT NULL, +- `b` blob NOT NULL, +- `c` text NOT NULL, +- `d` text NOT NULL, +- PRIMARY KEY (`a`,`b`(255),`c`(255)), +- KEY `b` (`b`(767)) +-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 +-CHECK TABLE t1; +-Table Op Msg_type Msg_text +-test.t1 check status OK +-EXPLAIN SELECT * FROM t1 WHERE b LIKE 'adfd%'; +-id select_type table type possible_keys key key_len ref rows Extra +-1 SIMPLE t1 range b b 769 NULL 12 Using where +-DROP TABLE t1; + # Test 8) Test creating a table that could lead to undo log overflow. + CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, + h blob,i blob,j blob,k blob,l blob,m blob,n blob, +@@ -389,10 +251,6 @@ + UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, + k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; + CREATE INDEX t1a ON t1 (a(767)); +-CREATE INDEX t1b ON t1 (b(767)); +-CREATE INDEX t1c ON t1 (c(767)); +-CREATE INDEX t1d ON t1 (d(767)); +-CREATE INDEX t1e ON t1 (e(767)); + UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, + k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; + CREATE INDEX t1f ON t1 (f(767)); +@@ -407,37 +265,15 @@ + COMMIT; + CREATE INDEX t1g ON t1 (g(767)); + UPDATE t1 SET g=@e; +-CREATE INDEX t1h ON t1 (h(767)); +-UPDATE t1 SET h=@e; +-CREATE INDEX t1i ON t1 (i(767)); +-UPDATE t1 SET i=@e; +-CREATE INDEX t1j ON t1 (j(767)); +-UPDATE t1 SET j=@e; +-CREATE INDEX t1k ON t1 (k(767)); +-UPDATE t1 SET k=@e; +-CREATE INDEX t1l ON t1 (l(767)); +-UPDATE t1 SET l=@e; +-CREATE INDEX t1m ON t1 (m(767)); +-UPDATE t1 SET m=@e; +-CREATE INDEX t1n ON t1 (n(767)); +-UPDATE t1 SET n=@e; +-CREATE INDEX t1o ON t1 (o(767)); +-UPDATE t1 SET o=@e; +-CREATE INDEX t1p ON t1 (p(767)); +-UPDATE t1 SET p=@e; +-CREATE INDEX t1q ON t1 (q(767)); +-UPDATE t1 SET q=@e; +-CREATE INDEX t1r ON t1 (r(767)); +-UPDATE t1 SET r=@e; +-CREATE INDEX t1s ON t1 (s(767)); +-UPDATE t1 SET s=@e; + CREATE INDEX t1t ON t1 (t(767)); + BEGIN; + UPDATE t1 SET t=@e; + ROLLBACK; + CREATE INDEX t1u ON t1 (u(767)); +-CREATE INDEX t1ut ON t1 (u(767), t(767)); +-CREATE INDEX t1st ON t1 (s(767), t(767)); ++CREATE INDEX t1ut ON t1 (u(767)); ++Warnings: ++Note 1831 Duplicate index `t1ut`. This is deprecated and will be disallowed in a future release ++CREATE INDEX t1st ON t1 (s(767)); + SHOW CREATE TABLE t1; + Table Create Table + t1 CREATE TABLE `t1` ( +@@ -463,28 +299,12 @@ + `t` blob DEFAULT NULL, + `u` blob DEFAULT NULL, + KEY `t1a` (`a`(767)), +- KEY `t1b` (`b`(767)), +- KEY `t1c` (`c`(767)), +- KEY `t1d` (`d`(767)), +- KEY `t1e` (`e`(767)), + KEY `t1f` (`f`(767)), + KEY `t1g` (`g`(767)), +- KEY `t1h` (`h`(767)), +- KEY `t1i` (`i`(767)), +- KEY `t1j` (`j`(767)), +- KEY `t1k` (`k`(767)), +- KEY `t1l` (`l`(767)), +- KEY `t1m` (`m`(767)), +- KEY `t1n` (`n`(767)), +- KEY `t1o` (`o`(767)), +- KEY `t1p` (`p`(767)), +- KEY `t1q` (`q`(767)), +- KEY `t1r` (`r`(767)), +- KEY `t1s` (`s`(767)), + KEY `t1t` (`t`(767)), + KEY `t1u` (`u`(767)), +- KEY `t1ut` (`u`(767),`t`(767)), +- KEY `t1st` (`s`(767),`t`(767)) ++ KEY `t1ut` (`u`(767)), ++ KEY `t1st` (`s`(767)) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC + DROP TABLE t1; + # Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE +@@ -565,27 +385,25 @@ + DROP TABLE t1; + CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, +-PRIMARY KEY (c(767),d(767))) ++PRIMARY KEY (c(767))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; + Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++Warning 139 Row size too large (> 1982). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, +-PRIMARY KEY (c(767),d(767))) ++PRIMARY KEY (c(767))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; +-Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, +-PRIMARY KEY (c(767),d(767))) ++PRIMARY KEY (c(767))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; + DROP TABLE t1; + CREATE TABLE t1(c text, PRIMARY KEY (c(440))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; + Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++Warning 139 Row size too large (> 1982). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1(c text, PRIMARY KEY (c(438))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; diff --git a/mysql-test/suite/innodb_zip/r/page_size,8k.rdiff b/mysql-test/suite/innodb_zip/r/page_size,8k.rdiff new file mode 100644 index 00000000000..a8e69e9b461 --- /dev/null +++ b/mysql-test/suite/innodb_zip/r/page_size,8k.rdiff @@ -0,0 +1,415 @@ +--- page_size.result ++++ page_size,8k.result +@@ -3,7 +3,7 @@ + SELECT variable_value FROM information_schema.global_status + WHERE LOWER(variable_name) = 'innodb_page_size'; + variable_value +-16384 ++8192 + # Test 3) Query some information_shema tables that are dependent upon + # the page size. + SELECT t.name table_name, t.n_cols, t.flag table_flags, +@@ -36,13 +36,13 @@ + table_name n_cols table_flags index_name root_page type n_fields merge_threshold + test/t1 5 0 PRIMARY 3 3 1 50 + test/t2 5 1 PRIMARY 3 3 1 50 +-test/t3 5 41 PRIMARY 3 3 1 50 ++test/t3 5 39 PRIMARY 3 3 1 50 + test/t4 5 33 PRIMARY 3 3 1 50 + === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles === + Space_Name Space_Type Page_Size Zip_Size Formats_Permitted Path + test/t1 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t1.ibd + test/t2 Single DEFAULT DEFAULT Compact or Redundant MYSQLD_DATADIR/test/t2.ibd +-test/t3 Single DEFAULT 8192 Compressed MYSQLD_DATADIR/test/t3.ibd ++test/t3 Single DEFAULT 4096 Compressed MYSQLD_DATADIR/test/t3.ibd + test/t4 Single DEFAULT DEFAULT Dynamic MYSQLD_DATADIR/test/t4.ibd + DROP TABLE t1, t2, t3, t4; + # Test 4) The maximum row size is dependent upon the page size. +@@ -53,133 +53,97 @@ + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(211) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155) + ) ROW_FORMAT=redundant; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(212) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156) + ) ROW_FORMAT=redundant; +-ERROR 42000: Row size too large (> 8123). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) + ) ROW_FORMAT=compact; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) + ) ROW_FORMAT=compact; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(157) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103) + ) ROW_FORMAT=compressed; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(160) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106) + ) ROW_FORMAT=compressed; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) + ) ROW_FORMAT=dynamic; + DROP TABLE t1; + CREATE TABLE t1 ( + c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), + c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), + c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +-c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200), +-c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200), +-c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200), +-c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200), +-c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247) ++c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) + ) ROW_FORMAT=dynamic; +-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(4) character set utf8, +-PRIMARY KEY (a,b,c,d,e)) ++ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++CREATE TABLE t1 (a varchar(128) character set utf8, ++b varchar(128) character set utf8, ++c varchar(128) character set utf8, ++d varchar(128) character set utf8, ++PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; + DROP TABLE t1; +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(5) character set utf8, +-PRIMARY KEY (a,b,c,d,e)) ++CREATE TABLE t1 (a varchar(128) character set utf8, ++b varchar(128) character set utf8, ++c varchar(128) character set utf8, ++d varchar(129) character set utf8, ++PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +-ERROR 42000: Specified key was too long; max key length is 3072 bytes +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(255) character set utf8, +-f varchar(4) character set utf8, +-PRIMARY KEY (a), KEY (b,c,d,e,f)) ++ERROR 42000: Specified key was too long; max key length is 1536 bytes ++CREATE TABLE t1 (a varchar(128) character set utf8, ++b varchar(128) character set utf8, ++c varchar(128) character set utf8, ++d varchar(128) character set utf8, ++e varchar(128) character set utf8, ++PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; + DROP TABLE t1; +-CREATE TABLE t1 (a varchar(255) character set utf8, +-b varchar(255) character set utf8, +-c varchar(255) character set utf8, +-d varchar(255) character set utf8, +-e varchar(255) character set utf8, +-f varchar(5) character set utf8, +-PRIMARY KEY (a), KEY (b,c,d,e,f)) ++CREATE TABLE t1 (a varchar(128) character set utf8, ++b varchar(128) character set utf8, ++c varchar(128) character set utf8, ++d varchar(128) character set utf8, ++e varchar(129) character set utf8, ++PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +-ERROR 42000: Specified key was too long; max key length is 3072 bytes ++ERROR 42000: Specified key was too long; max key length is 1536 bytes + # Test 5) KEY_BLOCK_SIZE validation +-CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; ++CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; ++ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message +-SELECT table_name, row_format, create_options +-FROM information_schema.tables WHERE table_name = 't1'; +-table_name row_format create_options +-t1 Compressed row_format=COMPRESSED key_block_size=16 +-ALTER TABLE t1 KEY_BLOCK_SIZE=8; ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8. ++Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options") ++Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB ++CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + SHOW WARNINGS; + Level Code Message + SELECT table_name, row_format, create_options +@@ -217,8 +181,11 @@ + DROP TABLE t1; + SET SESSION innodb_strict_mode = OFF; + CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; ++Warnings: ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: ignoring KEY_BLOCK_SIZE=16. + SELECT table_name, row_format, create_options + FROM information_schema.tables WHERE table_name = 't1'; + table_name row_format create_options +@@ -276,105 +243,11 @@ + ERROR HY000: Can't create table `test`.`t5` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message ++Warning 1478 InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8. + Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. + Error 1005 Can't create table `test`.`t5` (errno: 140 "Wrong create options") + Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB + SET GLOBAL innodb_file_per_table = ON; +-# Test 7) This series of tests were moved from innodb-index to here +-# because the second alter table t1 assumes a 16k page size. +-# Moving the test allows the rest of innodb-index to be run on all +-# page sizes. The previously disabled portions of this test were +-# moved as well. +-CREATE TABLE t2(d varchar(17) PRIMARY KEY) ENGINE=innodb DEFAULT CHARSET=utf8; +-CREATE TABLE t3(a int PRIMARY KEY) ENGINE=innodb; +-INSERT INTO t3 VALUES (22),(44),(33),(55),(66); +-INSERT INTO t2 VALUES ('jejdkrun87'),('adfd72nh9k'), +-('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik'); +-CREATE TABLE t1(a int, b blob, c text, d text NOT NULL) +-ENGINE=innodb DEFAULT CHARSET=utf8 STATS_PERSISTENT=0; +-INSERT INTO t1 +-SELECT a,LEFT(REPEAT(d,100*a),65535),REPEAT(d,20*a),d FROM t2,t3 order by a, d; +-DROP TABLE t2, t3; +-SELECT COUNT(*) FROM t1 WHERE a=44; +-COUNT(*) +-5 +-SELECT a, +-LENGTH(b),b=LEFT(REPEAT(d,100*a),65535),LENGTH(c),c=REPEAT(d,20*a),d FROM t1 +-ORDER BY 1, 2, 3, 4, 5, 6; +-a LENGTH(b) b=LEFT(REPEAT(d,100*a),65535) LENGTH(c) c=REPEAT(d,20*a) d +-22 22000 1 4400 1 adfd72nh9k +-22 22000 1 4400 1 jejdkrun87 +-22 26400 1 5280 1 adfdpplkeock +-22 28600 1 5720 1 adfdijnmnb78k +-22 35200 1 7040 1 adfdijn0loKNHJik +-33 33000 1 6600 1 adfd72nh9k +-33 33000 1 6600 1 jejdkrun87 +-33 39600 1 7920 1 adfdpplkeock +-33 42900 1 8580 1 adfdijnmnb78k +-33 52800 1 10560 1 adfdijn0loKNHJik +-44 44000 1 8800 1 adfd72nh9k +-44 44000 1 8800 1 jejdkrun87 +-44 52800 1 10560 1 adfdpplkeock +-44 57200 1 11440 1 adfdijnmnb78k +-44 65535 1 14080 1 adfdijn0loKNHJik +-55 55000 1 11000 1 adfd72nh9k +-55 55000 1 11000 1 jejdkrun87 +-55 65535 1 13200 1 adfdpplkeock +-55 65535 1 14300 1 adfdijnmnb78k +-55 65535 1 17600 1 adfdijn0loKNHJik +-66 65535 1 13200 1 adfd72nh9k +-66 65535 1 13200 1 jejdkrun87 +-66 65535 1 15840 1 adfdpplkeock +-66 65535 1 17160 1 adfdijnmnb78k +-66 65535 1 21120 1 adfdijn0loKNHJik +-ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20)); +-ERROR 23000: Duplicate entry '22' for key 'PRIMARY' +-DELETE FROM t1 WHERE d='null'; +-ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20)); +-ERROR 23000: Duplicate entry '22' for key 'PRIMARY' +-DELETE FROM t1 WHERE a%2; +-CHECK TABLE t1; +-Table Op Msg_type Msg_text +-test.t1 check status OK +-ALTER TABLE t1 ADD PRIMARY KEY (a,b(255),c(255)), ADD KEY (b(767)); +-SELECT COUNT(*) FROM t1 WHERE a=44; +-COUNT(*) +-5 +-SELECT a, +-LENGTH(b), b=LEFT(REPEAT(d,100*a), 65535),LENGTH(c), c=REPEAT(d,20*a), d FROM t1; +-a LENGTH(b) b=LEFT(REPEAT(d,100*a), 65535) LENGTH(c) c=REPEAT(d,20*a) d +-22 22000 1 4400 1 adfd72nh9k +-22 35200 1 7040 1 adfdijn0loKNHJik +-22 28600 1 5720 1 adfdijnmnb78k +-22 26400 1 5280 1 adfdpplkeock +-22 22000 1 4400 1 jejdkrun87 +-44 44000 1 8800 1 adfd72nh9k +-44 65535 1 14080 1 adfdijn0loKNHJik +-44 57200 1 11440 1 adfdijnmnb78k +-44 52800 1 10560 1 adfdpplkeock +-44 44000 1 8800 1 jejdkrun87 +-66 65535 1 13200 1 adfd72nh9k +-66 65535 1 21120 1 adfdijn0loKNHJik +-66 65535 1 17160 1 adfdijnmnb78k +-66 65535 1 15840 1 adfdpplkeock +-66 65535 1 13200 1 jejdkrun87 +-SHOW CREATE TABLE t1; +-Table Create Table +-t1 CREATE TABLE `t1` ( +- `a` int(11) NOT NULL, +- `b` blob NOT NULL, +- `c` text NOT NULL, +- `d` text NOT NULL, +- PRIMARY KEY (`a`,`b`(255),`c`(255)), +- KEY `b` (`b`(767)) +-) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0 +-CHECK TABLE t1; +-Table Op Msg_type Msg_text +-test.t1 check status OK +-EXPLAIN SELECT * FROM t1 WHERE b LIKE 'adfd%'; +-id select_type table type possible_keys key key_len ref rows Extra +-1 SIMPLE t1 range b b 769 NULL 12 Using where +-DROP TABLE t1; + # Test 8) Test creating a table that could lead to undo log overflow. + CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, + h blob,i blob,j blob,k blob,l blob,m blob,n blob, +@@ -389,10 +262,6 @@ + UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, + k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; + CREATE INDEX t1a ON t1 (a(767)); +-CREATE INDEX t1b ON t1 (b(767)); +-CREATE INDEX t1c ON t1 (c(767)); +-CREATE INDEX t1d ON t1 (d(767)); +-CREATE INDEX t1e ON t1 (e(767)); + UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, + k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; + CREATE INDEX t1f ON t1 (f(767)); +@@ -407,30 +276,6 @@ + COMMIT; + CREATE INDEX t1g ON t1 (g(767)); + UPDATE t1 SET g=@e; +-CREATE INDEX t1h ON t1 (h(767)); +-UPDATE t1 SET h=@e; +-CREATE INDEX t1i ON t1 (i(767)); +-UPDATE t1 SET i=@e; +-CREATE INDEX t1j ON t1 (j(767)); +-UPDATE t1 SET j=@e; +-CREATE INDEX t1k ON t1 (k(767)); +-UPDATE t1 SET k=@e; +-CREATE INDEX t1l ON t1 (l(767)); +-UPDATE t1 SET l=@e; +-CREATE INDEX t1m ON t1 (m(767)); +-UPDATE t1 SET m=@e; +-CREATE INDEX t1n ON t1 (n(767)); +-UPDATE t1 SET n=@e; +-CREATE INDEX t1o ON t1 (o(767)); +-UPDATE t1 SET o=@e; +-CREATE INDEX t1p ON t1 (p(767)); +-UPDATE t1 SET p=@e; +-CREATE INDEX t1q ON t1 (q(767)); +-UPDATE t1 SET q=@e; +-CREATE INDEX t1r ON t1 (r(767)); +-UPDATE t1 SET r=@e; +-CREATE INDEX t1s ON t1 (s(767)); +-UPDATE t1 SET s=@e; + CREATE INDEX t1t ON t1 (t(767)); + BEGIN; + UPDATE t1 SET t=@e; +@@ -463,24 +308,8 @@ + `t` blob DEFAULT NULL, + `u` blob DEFAULT NULL, + KEY `t1a` (`a`(767)), +- KEY `t1b` (`b`(767)), +- KEY `t1c` (`c`(767)), +- KEY `t1d` (`d`(767)), +- KEY `t1e` (`e`(767)), + KEY `t1f` (`f`(767)), + KEY `t1g` (`g`(767)), +- KEY `t1h` (`h`(767)), +- KEY `t1i` (`i`(767)), +- KEY `t1j` (`j`(767)), +- KEY `t1k` (`k`(767)), +- KEY `t1l` (`l`(767)), +- KEY `t1m` (`m`(767)), +- KEY `t1n` (`n`(767)), +- KEY `t1o` (`o`(767)), +- KEY `t1p` (`p`(767)), +- KEY `t1q` (`q`(767)), +- KEY `t1r` (`r`(767)), +- KEY `t1s` (`s`(767)), + KEY `t1t` (`t`(767)), + KEY `t1u` (`u`(767)), + KEY `t1ut` (`u`(767),`t`(767)), +@@ -568,14 +397,14 @@ + PRIMARY KEY (c(767),d(767))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; + Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++Warning 139 Row size too large (> 4030). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, + PRIMARY KEY (c(767),d(767))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; + Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++Warning 139 Row size too large (> 4030). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, +@@ -585,7 +414,7 @@ + CREATE TABLE t1(c text, PRIMARY KEY (c(440))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; + Warnings: +-Warning 139 Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. ++Warning 139 Row size too large (> 4030). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline. + DROP TABLE t1; + CREATE TABLE t1(c text, PRIMARY KEY (c(438))) + ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; diff --git a/mysql-test/suite/innodb_zip/r/16k.result b/mysql-test/suite/innodb_zip/r/page_size.result similarity index 77% rename from mysql-test/suite/innodb_zip/r/16k.result rename to mysql-test/suite/innodb_zip/r/page_size.result index e7def759e87..85ff027781e 100644 --- a/mysql-test/suite/innodb_zip/r/16k.result +++ b/mysql-test/suite/innodb_zip/r/page_size.result @@ -4,7 +4,6 @@ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size'; variable_value 16384 -# Test 2) The number of buffer pool pages is dependent upon the page size. # Test 3) Query some information_shema tables that are dependent upon # the page size. SELECT t.name table_name, t.n_cols, t.flag table_flags, @@ -47,8 +46,6 @@ test/t3 Single DEFAULT 8192 Compressed MYSQLD_DATADIR/test/t3.ibd test/t4 Single DEFAULT DEFAULT Dynamic MYSQLD_DATADIR/test/t4.ibd DROP TABLE t1, t2, t3, t4; # Test 4) The maximum row size is dependent upon the page size. -# Redundant: 8123, Compact: 8126. -# Compressed: 8126, Dynamic: 8126. # Each row format has its own amount of overhead that # varies depending on number of fields and other overhead. SET SESSION innodb_strict_mode = ON; @@ -174,9 +171,7 @@ f varchar(5) character set utf8, PRIMARY KEY (a), KEY (b,c,d,e,f)) ENGINE=innodb; ERROR 42000: Specified key was too long; max key length is 3072 bytes -# Test 5) Make sure that KEY_BLOCK_SIZE=16, 8, 4, 2 & 1 -# are all accepted. -SET SESSION innodb_strict_mode = ON; +# Test 5) KEY_BLOCK_SIZE validation CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; SHOW WARNINGS; Level Code Message @@ -264,22 +259,21 @@ FROM information_schema.tables WHERE table_name = 't1'; table_name row_format create_options t1 Compressed row_format=COMPRESSED DROP TABLE t1; -# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 -# are rejected when innodb_file_per_table=OFF +# Test 6) KEY_BLOCK_SIZE with innodb_file_per_table=OFF SET SESSION innodb_strict_mode = ON; SET GLOBAL innodb_file_per_table = OFF; SHOW VARIABLES LIKE 'innodb_file_per_table'; Variable_name Value innodb_file_per_table OFF CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; -Got one of the listed errors +ERROR HY000: Can't create table `test`.`t4` (errno: 140 "Wrong create options") SHOW WARNINGS; Level Code Message Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. Error 1005 Can't create table `test`.`t4` (errno: 140 "Wrong create options") Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; -Got one of the listed errors +ERROR HY000: Can't create table `test`.`t5` (errno: 140 "Wrong create options") SHOW WARNINGS; Level Code Message Warning 1478 InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table. @@ -493,124 +487,7 @@ t1 CREATE TABLE `t1` ( KEY `t1st` (`s`(767),`t`(767)) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DROP TABLE t1; -# Bug #12429576 - Test an assertion failure on purge. -CREATE TABLE t1_purge ( -A int, -B blob, C blob, D blob, E blob, -F blob, G blob, H blob, -PRIMARY KEY (B(767), C(767), D(767), E(767), A), -INDEX (A) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -INSERT INTO t1_purge VALUES (1, -REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), -REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766)); -CREATE TABLE t2_purge ( -A int PRIMARY KEY, -B blob, C blob, D blob, E blob, -F blob, G blob, H blob, I blob, -J blob, K blob, L blob, -INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -INSERT INTO t2_purge VALUES (1, -REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), -REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766), -REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766)); -CREATE TABLE t3_purge ( -A int, -B varchar(800), C varchar(800), D varchar(800), E varchar(800), -F varchar(800), G varchar(800), H varchar(800), -PRIMARY KEY (B(767), C(767), D(767), E(767), A), -INDEX (A) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -INSERT INTO t3_purge SELECT * FROM t1_purge; -CREATE TABLE t4_purge ( -A int PRIMARY KEY, -B varchar(800), C varchar(800), D varchar(800), E varchar(800), -F varchar(800), G varchar(800), H varchar(800), I varchar(800), -J varchar(800), K varchar(800), L varchar(800), -INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -INSERT INTO t4_purge SELECT * FROM t2_purge; -DELETE FROM t1_purge; -DELETE FROM t2_purge; -DELETE FROM t3_purge; -DELETE FROM t4_purge; -SET @r=REPEAT('a',500); -CREATE TABLE t12637786(a int, -v1 varchar(500), v2 varchar(500), v3 varchar(500), -v4 varchar(500), v5 varchar(500), v6 varchar(500), -v7 varchar(500), v8 varchar(500), v9 varchar(500), -v10 varchar(500), v11 varchar(500), v12 varchar(500), -v13 varchar(500), v14 varchar(500), v15 varchar(500), -v16 varchar(500), v17 varchar(500), v18 varchar(500) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -CREATE INDEX idx1 ON t12637786(a,v1); -INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -UPDATE t12637786 SET a=1000; -DELETE FROM t12637786; -# Bug#12963823 - Test that the purge thread does not crash when -CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob, -i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob) -ENGINE=innodb ROW_FORMAT=dynamic; -SET @r = REPEAT('a', 767); -INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r); -CREATE INDEX ndx_a ON t12963823 (a(500)); -CREATE INDEX ndx_b ON t12963823 (b(500)); -CREATE INDEX ndx_c ON t12963823 (c(500)); -CREATE INDEX ndx_d ON t12963823 (d(500)); -CREATE INDEX ndx_e ON t12963823 (e(500)); -CREATE INDEX ndx_f ON t12963823 (f(500)); -CREATE INDEX ndx_k ON t12963823 (k(500)); -CREATE INDEX ndx_l ON t12963823 (l(500)); -SET @r = REPEAT('b', 500); -UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r; -UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r; -UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r; -UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r; -ALTER TABLE t12963823 DROP INDEX ndx_a; -ALTER TABLE t12963823 DROP INDEX ndx_b; -CREATE INDEX ndx_g ON t12963823 (g(500)); -CREATE INDEX ndx_h ON t12963823 (h(500)); -CREATE INDEX ndx_i ON t12963823 (i(500)); -CREATE INDEX ndx_j ON t12963823 (j(500)); -CREATE INDEX ndx_m ON t12963823 (m(500)); -CREATE INDEX ndx_n ON t12963823 (n(500)); -CREATE INDEX ndx_o ON t12963823 (o(500)); -CREATE INDEX ndx_p ON t12963823 (p(500)); -SHOW CREATE TABLE t12963823; -Table Create Table -t12963823 CREATE TABLE `t12963823` ( - `a` blob DEFAULT NULL, - `b` blob DEFAULT NULL, - `c` blob DEFAULT NULL, - `d` blob DEFAULT NULL, - `e` blob DEFAULT NULL, - `f` blob DEFAULT NULL, - `g` blob DEFAULT NULL, - `h` blob DEFAULT NULL, - `i` blob DEFAULT NULL, - `j` blob DEFAULT NULL, - `k` blob DEFAULT NULL, - `l` blob DEFAULT NULL, - `m` blob DEFAULT NULL, - `n` blob DEFAULT NULL, - `o` blob DEFAULT NULL, - `p` blob DEFAULT NULL, - KEY `ndx_c` (`c`(500)), - KEY `ndx_d` (`d`(500)), - KEY `ndx_e` (`e`(500)), - KEY `ndx_f` (`f`(500)), - KEY `ndx_k` (`k`(500)), - KEY `ndx_l` (`l`(500)), - KEY `ndx_g` (`g`(500)), - KEY `ndx_h` (`h`(500)), - KEY `ndx_i` (`i`(500)), - KEY `ndx_j` (`j`(500)), - KEY `ndx_m` (`m`(500)), - KEY `ndx_n` (`n`(500)), - KEY `ndx_o` (`o`(500)), - KEY `ndx_p` (`p`(500)) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC # Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE -SET SESSION innodb_strict_mode = ON; CREATE TABLE bug12547647( a int NOT NULL, b blob NOT NULL, c text, PRIMARY KEY (b(10), a), INDEX (c(767)), INDEX(b(767)) @@ -623,7 +500,69 @@ SHOW WARNINGS; Level Code Message ROLLBACK; DROP TABLE bug12547647; -SET SESSION innodb_strict_mode = off; +SET SESSION innodb_strict_mode = OFF; +CREATE TABLE t1( +pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), +pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), +pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), +pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), +sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), +sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), +sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), +sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), +PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, +pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), +KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, +sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) +ROW_FORMAT=Redundant ENGINE=InnoDB; +SET @r = repeat('a', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('b', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('c', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('d', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('e', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +DELETE from t1; +DROP TABLE t1; +CREATE TABLE t1( +pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), +pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), +pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), +pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), +sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), +sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), +sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), +sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), +PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, +pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), +KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, +sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) +ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB; +SET @r = repeat('a', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('b', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('c', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('d', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('e', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, +@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +DELETE from t1; +DROP TABLE t1; CREATE TABLE t1( c text NOT NULL, d text NOT NULL, PRIMARY KEY (c(767),d(767))) @@ -642,7 +581,7 @@ CREATE TABLE t1( c text NOT NULL, d text NOT NULL, PRIMARY KEY (c(767),d(767))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; +DROP TABLE t1; CREATE TABLE t1(c text, PRIMARY KEY (c(440))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; Warnings: @@ -652,50 +591,3 @@ CREATE TABLE t1(c text, PRIMARY KEY (c(438))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); DROP TABLE t1; -# -# Bug#56862 Execution of a query that uses index merge returns a wrong result -# -CREATE TABLE t1 ( -pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, -a int, -b int, -INDEX idx(a)) -ENGINE=INNODB; -INSERT INTO t1(a,b) VALUES -(11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), -(3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), -(6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), -(13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); -INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; -INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; -INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1 VALUES (1000000, 0, 0); -set @optimizer_switch_saved=@@optimizer_switch; -SET SESSION optimizer_switch='derived_merge=off'; -SET SESSION sort_buffer_size = 1024*36; -EXPLAIN -SELECT COUNT(*) FROM -(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) -WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; -id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 1537 -2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 1537 Using sort_union(idx,PRIMARY); Using where -SELECT COUNT(*) FROM -(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) -WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; -COUNT(*) -1537 -set @@optimizer_switch=@optimizer_switch_saved; -SET SESSION sort_buffer_size = DEFAULT; -DROP TABLE t1; -DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge; -DROP TABLE t12637786; -DROP TABLE t12963823; diff --git a/mysql-test/suite/innodb_zip/t/4k-master.opt b/mysql-test/suite/innodb_zip/t/4k-master.opt deleted file mode 100644 index 82f574a8039..00000000000 --- a/mysql-test/suite/innodb_zip/t/4k-master.opt +++ /dev/null @@ -1,3 +0,0 @@ ---loose-innodb-sys-indexes ---loose-innodb-sys-tablespaces ---loose-innodb-sys-datafiles diff --git a/mysql-test/suite/innodb_zip/t/4k.test b/mysql-test/suite/innodb_zip/t/4k.test deleted file mode 100644 index 3b8c1771a96..00000000000 --- a/mysql-test/suite/innodb_zip/t/4k.test +++ /dev/null @@ -1,433 +0,0 @@ -# Tests for setting innodb-page-size=4k - ---source include/have_innodb.inc ---source include/have_innodb_4k.inc -SET default_storage_engine=InnoDB; - ---disable_query_log -let $MYSQLD_DATADIR = `select @@datadir`; -let $INNODB_PAGE_SIZE = `select @@innodb_page_size`; - -call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is"); ---enable_query_log - ---echo # Test 1) Show the page size from Information Schema ---disable_warnings -SELECT variable_value FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_page_size'; ---enable_warnings - ---echo # Test 2) The number of buffer pool pages is dependent upon the page size. ---disable_warnings ---replace_result 2048 {checked_valid} -SELECT variable_value FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; ---enable_warnings - ---echo # Test 3) Query some information_shema tables that are dependent upon ---echo # the page size. -# Show the metadata for tables in schema 'mysql'. -# Pulled from innodb-system-table-view.test -# The IDs of mysql.innodb_table_stats and mysql.innodb_index_stats are -# unpredictable. They depend on whether mtr has created the database for -# this test from scratch or is using a previously created database where -# those tables have been dropped and recreated. Since we cannot force mtr -# to use a freshly created database for this test we do not return the -# table or index IDs. We can return the space IS of mysql schema tables -# since they are created consistently during bootstrap. -SELECT t.name table_name, t.n_cols, t.flag table_flags, - i.name index_name, i.page_no root_page, i.type, - i.n_fields, i.merge_threshold - FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, - INFORMATION_SCHEMA.INNODB_SYS_INDEXES i - WHERE t.table_id = i.table_id - AND t.name LIKE 'mysql%' - ORDER BY t.name, i.index_id; - -CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb; -CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb; -CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb; -CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb; - -# Show the metadata for tables in schema 'test'. -# Do not return the space ID since this tablespace may have existed before -# this test runs. The root page number of each index should be consistent -# within a file-per-table tablespace. -SELECT t.name table_name, t.n_cols, t.flag table_flags, - i.name index_name, i.page_no root_page, i.type, - i.n_fields, i.merge_threshold - FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, - INFORMATION_SCHEMA.INNODB_SYS_INDEXES i - WHERE t.table_id = i.table_id - AND t.name LIKE 'test%' - ORDER BY t.name, i.name; ---source suite/innodb/include/show_i_s_tablespaces.inc -DROP TABLE t1, t2, t3, t4; - ---echo # Test 4) The maximum row size is dependent upon the page size. ---echo # Redundant: 1979, Compact: 1982. ---echo # Compressed: 1982, Dynamic: 1982. ---echo # Each row format has its own amount of overhead that ---echo # varies depending on number of fields and other overhead. - -SET SESSION innodb_strict_mode = ON; - -# Redundant table; 1927 bytes with 10 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127) -) ROW_FORMAT=redundant; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128) -) ROW_FORMAT=redundant; - -# Compact table; 1955 bytes with 10 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) -) ROW_FORMAT=compact; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) -) ROW_FORMAT=compact; - -# Compressed table; 1878 bytes with 10 CHAR fields -# Bug#13391353 Limit is 1876 on 32-Linux only -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76) -) ROW_FORMAT=compressed; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79) -) ROW_FORMAT=compressed; - -# Dynamic table; 1955 bytes with 10 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) -) ROW_FORMAT=dynamic; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) -) ROW_FORMAT=dynamic; - -# -# Test the maximum key length -# Moved from innodb-index.test since each page size has its own max key length. -# Max Key Length is 768 for 4k pages. -# -# InnoDB assumes 3 bytes for each UTF8 character. -# -CREATE TABLE t1 (a varchar(64) character set utf8, - b varchar(64) character set utf8, - c varchar(64) character set utf8, - d varchar(64) character set utf8, - PRIMARY KEY (a,b,c,d)) - ENGINE=innodb; -DROP TABLE t1; ---error ER_TOO_LONG_KEY -CREATE TABLE t1 (a varchar(64) character set utf8, - b varchar(64) character set utf8, - c varchar(64) character set utf8, - d varchar(65) character set utf8, - PRIMARY KEY (a,b,c,d)) - ENGINE=innodb; -CREATE TABLE t1 (a varchar(64) character set utf8, - b varchar(64) character set utf8, - c varchar(64) character set utf8, - d varchar(64) character set utf8, - e varchar(64) character set utf8, - PRIMARY KEY (a), KEY (b,c,d,e)) - ENGINE=innodb; -DROP TABLE t1; ---error ER_TOO_LONG_KEY -CREATE TABLE t1 (a varchar(64) character set utf8, - b varchar(64) character set utf8, - c varchar(64) character set utf8, - d varchar(64) character set utf8, - e varchar(65) character set utf8, - PRIMARY KEY (a), KEY (b,c,d,e)) - ENGINE=innodb; - ---echo # Test 5) Make sure that KEY_BLOCK_SIZE=4, 2 & 1 are all ---echo # accepted and that KEY_BLOCK_SIZE=16 & 8 are rejected ---echo # in strict mode and converted to 4 in non-strict mode. - -SET SESSION innodb_strict_mode = ON; - ---error ER_CANT_CREATE_TABLE -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -SHOW WARNINGS; - ---error ER_CANT_CREATE_TABLE -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -SET SESSION innodb_strict_mode = OFF; - -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - - ---echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 ---echo # are both rejected when innodb_file_per_table=OFF -# Moved from innodb-zip.test -SET SESSION innodb_strict_mode = ON; -SET GLOBAL innodb_file_per_table = OFF; -SHOW VARIABLES LIKE 'innodb_file_per_table'; ---error ER_CANT_CREATE_TABLE -CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; -SHOW WARNINGS; ---error ER_CANT_CREATE_TABLE -CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; -SHOW WARNINGS; -SET GLOBAL innodb_file_per_table = ON; - ---echo # Test 7) Not included here; 16k only - - ---echo # Test 8) Test creating a table that could lead to undo log overflow. -CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, - h blob,i blob,j blob,k blob,l blob,m blob,n blob, - o blob,p blob,q blob,r blob,s blob,t blob,u blob) - ENGINE=InnoDB ROW_FORMAT=dynamic; -SET @a = repeat('a', 767); -SET @b = repeat('b', 767); -SET @c = repeat('c', 767); -SET @d = repeat('d', 767); -SET @e = repeat('e', 767); - -# With no indexes defined, we can update all columns to max key part length. -INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a); -UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, - k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; - -# With one index defined, we can still update all fields. -CREATE INDEX t1a ON t1 (a(767)); -UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, - k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; - -# Add one more index and the UNDO record becomes too big to update all columns. -# But a single transaction can update the columns in separate statements. -# because the UNDO records will be smaller. -CREATE INDEX t1b ON t1 (b(767)); -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d, - k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -ROLLBACK; -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d; -UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d, - n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -COMMIT; - -# Another index can still be added and a single field can still be updated -CREATE INDEX t1c ON t1 (c(767)); -UPDATE t1 SET c=@e; - -# Add one more index and we cannot update a column to its defined index length. -# This is a problem. It means that the DDL is allowed to create a table -# that CANNOT be updated. See bug#12953735. -CREATE INDEX t1d ON t1 (d(767)); -BEGIN; -UPDATE t1 SET d=@e; -ROLLBACK; - ---replace_regex /> [0-9]*/> max_row_size/ -CREATE INDEX t1e ON t1 (e(767)); - -SHOW CREATE TABLE t1; -DROP TABLE t1; - -# -# Bug #13336585 - INNODB: CHANGE BUFFERING WITH 4K PAGES CAN ASSERT -# IF SECONDARY KEY IS NEAR MAX -# If the secondary index tuple is close to half the page size, -# ibuf_insert_low() could return DB_TOO_BIG_RECORD, which is not expected -# in ibuf_insert(). In order to insure this does not happen, WL5756 -# imposes a maximum key length of 768 for 4k pages and 1536 for 8k pages. -# The existing max key Size for 16k pages is 3072. -# - -#-- disable_query_log -# The flag innodb_change_buffering_debug is only available in debug builds. -# It instructs InnoDB to try to evict pages from the buffer pool when -# change buffering is possible, so that the change buffer will be used -# whenever possible. -# This flag is not used currently since it exposes valgrind error in ibuf -# code with the following SQL -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET GLOBAL innodb_change_buffering_debug = 1; -#-- enable_query_log - -# make sure the largest possible key entry can be added to the insert buffer. -# Make enough records so that the root page is not a leaf page. -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1( - pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), - pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), - pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), - pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), - sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), - sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), - sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), - sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), - PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, - pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), - KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, - sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) - ROW_FORMAT=Redundant ENGINE=InnoDB; -SET @r = repeat('a', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; - -# Compressed tables do not compress parent pages. So the whole uncompressed -# secondary tuple including the primary key must be able to fit in half the -# compressed page size. This record length is enforced at index creation. -# So the only way to get an ibuf tuple too big is to make the KEY_BLOCK_SIZE -# the same as the page size. -CREATE TABLE t1( - pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), - pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), - pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), - pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), - sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), - sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), - sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), - sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), - PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, - pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), - KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, - sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) - ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB; -SET @r = repeat('a', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 48); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; - -#-- disable_query_log -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET GLOBAL innodb_change_buffering_debug = 0; -#-- enable_query_log - -# The following should fail in non-strict mode too. -# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.) -SET SESSION innodb_strict_mode = off; ---replace_regex /> [0-9]*/> max_row_size/ -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -DROP TABLE t1; -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; -drop table t1; -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; ---replace_regex /> [0-9]*/> max_row_size/ -CREATE TABLE t1(c text, PRIMARY KEY (c(440))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -DROP TABLE t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(438))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); -DROP TABLE t1; diff --git a/mysql-test/suite/innodb_zip/t/8k-master.opt b/mysql-test/suite/innodb_zip/t/8k-master.opt deleted file mode 100644 index 82f574a8039..00000000000 --- a/mysql-test/suite/innodb_zip/t/8k-master.opt +++ /dev/null @@ -1,3 +0,0 @@ ---loose-innodb-sys-indexes ---loose-innodb-sys-tablespaces ---loose-innodb-sys-datafiles diff --git a/mysql-test/suite/innodb_zip/t/8k.test b/mysql-test/suite/innodb_zip/t/8k.test deleted file mode 100644 index e10d48cb284..00000000000 --- a/mysql-test/suite/innodb_zip/t/8k.test +++ /dev/null @@ -1,461 +0,0 @@ -# Tests for setting innodb-page-size=8k - ---source include/have_innodb.inc ---source include/have_innodb_8k.inc -SET default_storage_engine=InnoDB; - ---disable_query_log -let $MYSQLD_DATADIR = `select @@datadir`; -let $INNODB_PAGE_SIZE = `select @@innodb_page_size`; - -call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is"); ---enable_query_log - ---echo # Test 1) Show the page size from Information Schema ---disable_warnings -SELECT variable_value FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_page_size'; ---enable_warnings - ---echo # Test 2) The number of buffer pool pages is dependent upon the page size. ---disable_warnings ---replace_result 1023 {checked_valid} 1024 {checked_valid} -SELECT variable_value FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; ---enable_warnings - ---echo # Test 3) Query some information_shema tables that are dependent upon ---echo # the page size. -# Show the metadata for tables in schema 'mysql'. -# Pulled from innodb-system-table-view.test -# The IDs of mysql.innodb_table_stats and mysql.innodb_index_stats are -# unpredictable. They depend on whether mtr has created the database for -# this test from scratch or is using a previously created database where -# those tables have been dropped and recreated. Since we cannot force mtr -# to use a freshly created database for this test we do not return the -# table or index IDs. We can return the space IS of mysql schema tables -# since they are created consistently during bootstrap. -SELECT t.name table_name, t.n_cols, t.flag table_flags, - i.name index_name, i.page_no root_page, i.type, - i.n_fields, i.merge_threshold - FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, - INFORMATION_SCHEMA.INNODB_SYS_INDEXES i - WHERE t.table_id = i.table_id - AND t.name LIKE 'mysql%' - ORDER BY t.name, i.index_id; - -CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb; -CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb; -CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb; -CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb; - -# Show the metadata for tables in schema 'test'. -# Do not return the space ID since this tablespace may have existed before -# this test runs. The root page number of each index should be consistent -# within a file-per-table tablespace. -SELECT t.name table_name, t.n_cols, t.flag table_flags, - i.name index_name, i.page_no root_page, i.type, - i.n_fields, i.merge_threshold - FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES t, - INFORMATION_SCHEMA.INNODB_SYS_INDEXES i - WHERE t.table_id = i.table_id - AND t.name LIKE 'test%' - ORDER BY t.name, i.name; ---source suite/innodb/include/show_i_s_tablespaces.inc -DROP TABLE t1, t2, t3, t4; - ---echo # Test 4) The maximum row size is dependent upon the page size. ---echo # Redundant: 4027, Compact: 4030. ---echo # Compressed: 4030, Dynamic: 4030. ---echo # Each row format has its own amount of overhead that ---echo # varies depending on number of fields and other overhead. - -SET SESSION innodb_strict_mode = ON; - -# Redundant table; 3955 bytes with 20 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155) -) ROW_FORMAT=redundant; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156) -) ROW_FORMAT=redundant; - -# Compact table; 4002 bytes with 20 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) -) ROW_FORMAT=compact; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) -) ROW_FORMAT=compact; - -# Compressed table; 3905 bytes with 20 CHAR fields -# Bug#13391353 Limit is 3903 on 32-Linux only -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103) -) ROW_FORMAT=compressed; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106) -) ROW_FORMAT=compressed; - -# Dynamic table; 4002 bytes with 20 CHAR fields -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) -) ROW_FORMAT=dynamic; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ ---error ER_TOO_BIG_ROWSIZE -CREATE TABLE t1 ( -c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), -c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), -c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), -c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) -) ROW_FORMAT=dynamic; - -# -# Test the maximum key length -# Moved from innodb-index.test since each page size has its own max key length. -# Max Key Length is 1536 for 8k pages. -# -# InnoDB assumes 3 bytes for each UTF8 character. -# -CREATE TABLE t1 (a varchar(128) character set utf8, - b varchar(128) character set utf8, - c varchar(128) character set utf8, - d varchar(128) character set utf8, - PRIMARY KEY (a,b,c,d)) - ENGINE=innodb; -DROP TABLE t1; ---error ER_TOO_LONG_KEY -CREATE TABLE t1 (a varchar(128) character set utf8, - b varchar(128) character set utf8, - c varchar(128) character set utf8, - d varchar(129) character set utf8, - PRIMARY KEY (a,b,c,d)) - ENGINE=innodb; -CREATE TABLE t1 (a varchar(128) character set utf8, - b varchar(128) character set utf8, - c varchar(128) character set utf8, - d varchar(128) character set utf8, - e varchar(128) character set utf8, - PRIMARY KEY (a), KEY (b,c,d,e)) - ENGINE=innodb; -DROP TABLE t1; ---error ER_TOO_LONG_KEY -CREATE TABLE t1 (a varchar(128) character set utf8, - b varchar(128) character set utf8, - c varchar(128) character set utf8, - d varchar(128) character set utf8, - e varchar(129) character set utf8, - PRIMARY KEY (a), KEY (b,c,d,e)) - ENGINE=innodb; - ---echo # Test 5) Make sure that KEY_BLOCK_SIZE=8, 4, 2 & 1 are all ---echo # accepted and that KEY_BLOCK_SIZE=16 is rejected in ---echo # strict mode and converted to 8 in non-strict mode. - -SET SESSION innodb_strict_mode = ON; - ---error ER_CANT_CREATE_TABLE -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -SHOW WARNINGS; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -SET SESSION innodb_strict_mode = OFF; - -CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - -CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=2; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=1; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; - -ALTER TABLE t1 KEY_BLOCK_SIZE=0; -SHOW WARNINGS; -SELECT table_name, row_format, create_options - FROM information_schema.tables WHERE table_name = 't1'; -DROP TABLE t1; - - ---echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 ---echo # are rejected when innodb_file_per_table=OFF -# Moved from innodb-zip.test -SET SESSION innodb_strict_mode = ON; -SET GLOBAL innodb_file_per_table = OFF; -SHOW VARIABLES LIKE 'innodb_file_per_table'; ---error ER_CANT_CREATE_TABLE -CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; -SHOW WARNINGS; ---error ER_CANT_CREATE_TABLE -CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; -SHOW WARNINGS; -SET GLOBAL innodb_file_per_table = ON; - ---echo # Test 7) Not included here; 16k only - - ---echo # Test 8) Test creating a table that could lead to undo log overflow. -CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, - h blob,i blob,j blob,k blob,l blob,m blob,n blob, - o blob,p blob,q blob,r blob,s blob,t blob,u blob) - ENGINE=InnoDB ROW_FORMAT=dynamic; -SET @a = repeat('a', 767); -SET @b = repeat('b', 767); -SET @c = repeat('c', 767); -SET @d = repeat('d', 767); -SET @e = repeat('e', 767); - -# With no indexes defined, we can update all columns to max key part length. -INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a); -UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, - k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b; - -# With this many indexes defined, we can still update all fields. -CREATE INDEX t1a ON t1 (a(767)); -CREATE INDEX t1b ON t1 (b(767)); -UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, - k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; - -# Add one more index and the UNDO record becomes too big to update all columns. -# But a single transaction can update the columns in separate statements. -# because the UNDO records will be smaller. -CREATE INDEX t1c ON t1 (c(767)); -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d, - k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -ROLLBACK; -BEGIN; -UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d; -UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d, - n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d; -COMMIT; - -# More indexes can still be added and a single field can still be updated -CREATE INDEX t1d ON t1 (d(767)); -UPDATE t1 SET d=@e; -CREATE INDEX t1e ON t1 (e(767)); -UPDATE t1 SET e=@e; -CREATE INDEX t1f ON t1 (f(767)); -UPDATE t1 SET f=@e; -CREATE INDEX t1g ON t1 (g(767)); -UPDATE t1 SET g=@e; -CREATE INDEX t1h ON t1 (h(767)); -UPDATE t1 SET h=@e; -CREATE INDEX t1i ON t1 (i(767)); -UPDATE t1 SET i=@e; - ---replace_regex /> [0-9]*/> max_row_size/ -CREATE INDEX t1k ON t1 (j(767)); - -# But it does allow a 500 byte index. And with this, we cannot -# update the record. This is a problem. It means that the DDL is -# allowed to create a table and a record that CANNOT be updated. -# See bug#12953735 ---replace_regex /> [0-9]*/> max_row_size/ -CREATE INDEX t1j ON t1 (j(500)); -BEGIN; -UPDATE t1 SET j=@e; -ROLLBACK; -SHOW CREATE TABLE t1; -DROP TABLE t1; - -# -# Bug #13336585 - INNODB: CHANGE BUFFERING WITH 4K PAGES CAN ASSERT -# IF SECONDARY KEY IS NEAR MAX -# If the secondary index tuple is close to half the page size, -# ibuf_insert_low() could return DB_TOO_BIG_RECORD, which is not expected -# in ibuf_insert(). In order to insure this does not happen, WL5756 -# imposes a maximum key length of 768 for 4k pages and 1536 for 8k pages. -# The existing max key Size for 16k pages is 3072. -# - -#-- disable_query_log -# The flag innodb_change_buffering_debug is only available in debug builds. -# It instructs InnoDB to try to evict pages from the buffer pool when -# change buffering is possible, so that the change buffer will be used -# whenever possible. -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET GLOBAL innodb_change_buffering_debug = 1; -#-- enable_query_log - -# make sure the largest possible key entry can be added to the insert buffer. -# Make enough records so that the root page is not a leaf page. -SET SESSION innodb_strict_mode = OFF; -CREATE TABLE t1( - pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96), - pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96), - pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96), - pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96), - sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96), - sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96), - sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96), - sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96), - PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, - pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), - KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, - sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) - ROW_FORMAT=Redundant ENGINE=InnoDB; -SET @r = repeat('a', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; - -# Compressed tables do not compress parent pages. So the whole uncompressed -# secondary tuple including the primary key must be able to fit in half the -# compressed page size. This record length is enforced at index creation. -# So the only way to get an ibuf tuple too big is to make the KEY_BLOCK_SIZE -# the same as the page size. -CREATE TABLE t1( - pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96), - pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96), - pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96), - pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96), - sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96), - sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96), - sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96), - sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96), - PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, - pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), - KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, - sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) - ROW_FORMAT=Compressed KEY_BLOCK_SIZE=8 ENGINE=InnoDB; -SET @r = repeat('a', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('b', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('c', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('d', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -SET @r = repeat('e', 96); -INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, - @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -DELETE from t1; -DROP TABLE t1; - -#-- disable_query_log -#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE -#SET GLOBAL innodb_change_buffering_debug = 0; -#-- enable_query_log - -# The following should fail in non-strict mode too. -# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.) -SET SESSION innodb_strict_mode = off; ---replace_regex /> [0-9]*/> max_row_size/ -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -DROP TABLE t1; ---replace_regex /> [0-9]*/> max_row_size/ -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; -DROP TABLE t1; -CREATE TABLE t1( - c text NOT NULL, d text NOT NULL, - PRIMARY KEY (c(767),d(767))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; ---replace_regex /> [0-9]*/> max_row_size/ -CREATE TABLE t1(c text, PRIMARY KEY (c(440))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -DROP TABLE t1; -CREATE TABLE t1(c text, PRIMARY KEY (c(438))) -ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; -INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); -DROP TABLE t1; diff --git a/mysql-test/suite/innodb_zip/t/16k-master.opt b/mysql-test/suite/innodb_zip/t/page_size.opt similarity index 100% rename from mysql-test/suite/innodb_zip/t/16k-master.opt rename to mysql-test/suite/innodb_zip/t/page_size.opt diff --git a/mysql-test/suite/innodb_zip/t/16k.test b/mysql-test/suite/innodb_zip/t/page_size.test similarity index 57% rename from mysql-test/suite/innodb_zip/t/16k.test rename to mysql-test/suite/innodb_zip/t/page_size.test index 6a829e6bbb6..575259a7855 100644 --- a/mysql-test/suite/innodb_zip/t/16k.test +++ b/mysql-test/suite/innodb_zip/t/page_size.test @@ -1,7 +1,4 @@ -# Tests for setting innodb-page-size=16k; default value ---source include/big_test.inc ---source include/have_innodb.inc ---source include/have_innodb_16k.inc +--source include/innodb_page_size_small.inc SET default_storage_engine=InnoDB; --disable_query_log @@ -9,24 +6,11 @@ let $MYSQLD_DATADIR = `select @@datadir`; let $INNODB_PAGE_SIZE = `select @@innodb_page_size`; call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is"); -# These values can change during the test --enable_query_log --echo # Test 1) Show the page size from Information Schema ---disable_warnings SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size'; ---enable_warnings - ---echo # Test 2) The number of buffer pool pages is dependent upon the page size. -# -# buffer pool pages is dependent upon buffer pool size and what other -# tests are run concurrently -#--disable_warnings -#--replace_result 1535 {checked_valid} 1536 {checked_valid} -#SELECT variable_value FROM information_schema.global_status -# WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; -#--enable_warnings --echo # Test 3) Query some information_shema tables that are dependent upon --echo # the page size. @@ -69,13 +53,212 @@ SELECT t.name table_name, t.n_cols, t.flag table_flags, DROP TABLE t1, t2, t3, t4; --echo # Test 4) The maximum row size is dependent upon the page size. ---echo # Redundant: 8123, Compact: 8126. ---echo # Compressed: 8126, Dynamic: 8126. --echo # Each row format has its own amount of overhead that --echo # varies depending on number of fields and other overhead. SET SESSION innodb_strict_mode = ON; +if ($INNODB_PAGE_SIZE == 4096) { +# Redundant table; 1927 bytes with 10 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127) +) ROW_FORMAT=redundant; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128) +) ROW_FORMAT=redundant; + +# Compact table; 1955 bytes with 10 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) +) ROW_FORMAT=compact; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) +) ROW_FORMAT=compact; + +# Compressed table; 1878 bytes with 10 CHAR fields +# Bug#13391353 Limit is 1876 on 32-Linux only +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76) +) ROW_FORMAT=compressed; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79) +) ROW_FORMAT=compressed; + +# Dynamic table; 1955 bytes with 10 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155) +) ROW_FORMAT=dynamic; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156) +) ROW_FORMAT=dynamic; + +# +# Test the maximum key length +# Moved from innodb-index.test since each page size has its own max key length. +# Max Key Length is 1173 for 4k pages. +# +# InnoDB assumes 3 bytes for each UTF8 character. +# +CREATE TABLE t1 (a varchar(98) character set utf8, + b varchar(98) character set utf8, + c varchar(98) character set utf8, + d varchar(97) character set utf8, + PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +DROP TABLE t1; +--error ER_TOO_LONG_KEY +CREATE TABLE t1 (a varchar(98) character set utf8, + b varchar(98) character set utf8, + c varchar(98) character set utf8, + d varchar(98) character set utf8, + PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +CREATE TABLE t1 (a varchar(98) character set utf8, + b varchar(98) character set utf8, + c varchar(98) character set utf8, + d varchar(98) character set utf8, + e varchar(97) character set utf8, + PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +DROP TABLE t1; +--error ER_TOO_LONG_KEY +CREATE TABLE t1 (a varchar(98) character set utf8, + b varchar(98) character set utf8, + c varchar(98) character set utf8, + d varchar(98) character set utf8, + e varchar(98) character set utf8, + PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +} +if ($INNODB_PAGE_SIZE == 8192) { +# Redundant table; 3955 bytes with 20 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155) +) ROW_FORMAT=redundant; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156) +) ROW_FORMAT=redundant; + +# Compact table; 4002 bytes with 20 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) +) ROW_FORMAT=compact; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) +) ROW_FORMAT=compact; + +# Compressed table; 3905 bytes with 20 CHAR fields +# Bug#13391353 Limit is 3903 on 32-Linux only +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103) +) ROW_FORMAT=compressed; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106) +) ROW_FORMAT=compressed; + +# Dynamic table; 4002 bytes with 20 CHAR fields +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202) +) ROW_FORMAT=dynamic; +DROP TABLE t1; +--replace_regex /> [0-9]*/> max_row_size/ +--error ER_TOO_BIG_ROWSIZE +CREATE TABLE t1 ( +c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), +c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200), +c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200), +c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203) +) ROW_FORMAT=dynamic; + +# +# Test the maximum key length +# Moved from innodb-index.test since each page size has its own max key length. +# Max Key Length is 1536 for 8k pages. +# +# InnoDB assumes 3 bytes for each UTF8 character. +# +CREATE TABLE t1 (a varchar(128) character set utf8, + b varchar(128) character set utf8, + c varchar(128) character set utf8, + d varchar(128) character set utf8, + PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +DROP TABLE t1; +--error ER_TOO_LONG_KEY +CREATE TABLE t1 (a varchar(128) character set utf8, + b varchar(128) character set utf8, + c varchar(128) character set utf8, + d varchar(129) character set utf8, + PRIMARY KEY (a,b,c,d)) + ENGINE=innodb; +CREATE TABLE t1 (a varchar(128) character set utf8, + b varchar(128) character set utf8, + c varchar(128) character set utf8, + d varchar(128) character set utf8, + e varchar(128) character set utf8, + PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +DROP TABLE t1; +--error ER_TOO_LONG_KEY +CREATE TABLE t1 (a varchar(128) character set utf8, + b varchar(128) character set utf8, + c varchar(128) character set utf8, + d varchar(128) character set utf8, + e varchar(129) character set utf8, + PRIMARY KEY (a), KEY (b,c,d,e)) + ENGINE=innodb; +} +if ($INNODB_PAGE_SIZE == 16384) { # Redundant table; 8011 bytes with 40 char fields CREATE TABLE t1 ( c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200), @@ -214,12 +397,32 @@ CREATE TABLE t1 (a varchar(255) character set utf8, f varchar(5) character set utf8, PRIMARY KEY (a), KEY (b,c,d,e,f)) ENGINE=innodb; +} ---echo # Test 5) Make sure that KEY_BLOCK_SIZE=16, 8, 4, 2 & 1 ---echo # are all accepted. +--echo # Test 5) KEY_BLOCK_SIZE validation -SET SESSION innodb_strict_mode = ON; +if ($INNODB_PAGE_SIZE == 4096) { +--error ER_CANT_CREATE_TABLE +CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; +SHOW WARNINGS; +--error ER_CANT_CREATE_TABLE +CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +SHOW WARNINGS; + +CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED; +} +if ($INNODB_PAGE_SIZE == 8192) { +--error ER_CANT_CREATE_TABLE +CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; +SHOW WARNINGS; + +CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +SHOW WARNINGS; +SELECT table_name, row_format, create_options + FROM information_schema.tables WHERE table_name = 't1'; +} +if ($INNODB_PAGE_SIZE == 16384) { CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16; SHOW WARNINGS; SELECT table_name, row_format, create_options @@ -229,7 +432,7 @@ ALTER TABLE t1 KEY_BLOCK_SIZE=8; SHOW WARNINGS; SELECT table_name, row_format, create_options FROM information_schema.tables WHERE table_name = 't1'; - +} ALTER TABLE t1 KEY_BLOCK_SIZE=4; SHOW WARNINGS; SELECT table_name, row_format, create_options @@ -285,20 +488,20 @@ SELECT table_name, row_format, create_options DROP TABLE t1; ---echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16 ---echo # are rejected when innodb_file_per_table=OFF +--echo # Test 6) KEY_BLOCK_SIZE with innodb_file_per_table=OFF # Moved from innodb-zip.test SET SESSION innodb_strict_mode = ON; SET GLOBAL innodb_file_per_table = OFF; SHOW VARIABLES LIKE 'innodb_file_per_table'; ---error ER_ILLEGAL_HA,1005 +--error ER_CANT_CREATE_TABLE CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8; SHOW WARNINGS; ---error ER_ILLEGAL_HA,1005 +--error ER_CANT_CREATE_TABLE CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16; SHOW WARNINGS; SET GLOBAL innodb_file_per_table = ON; +if ($INNODB_PAGE_SIZE == 16384) { --echo # Test 7) This series of tests were moved from innodb-index to here --echo # because the second alter table t1 assumes a 16k page size. --echo # Moving the test allows the rest of innodb-index to be run on all @@ -409,6 +612,7 @@ EXPLAIN SELECT * FROM t1 WHERE b LIKE 'adfd%'; # # end disabled45225_1 DROP TABLE t1; +} --echo # Test 8) Test creating a table that could lead to undo log overflow. CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob, @@ -428,10 +632,12 @@ UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b, # With this many indexes defined, we can still update all fields. CREATE INDEX t1a ON t1 (a(767)); +if ($INNODB_PAGE_SIZE == 16384) { CREATE INDEX t1b ON t1 (b(767)); CREATE INDEX t1c ON t1 (c(767)); CREATE INDEX t1d ON t1 (d(767)); CREATE INDEX t1e ON t1 (e(767)); +} UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c, k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c; @@ -452,6 +658,7 @@ COMMIT; # More indexes can still be added and a single field can still be updated CREATE INDEX t1g ON t1 (g(767)); UPDATE t1 SET g=@e; +if ($INNODB_PAGE_SIZE == 16384) { CREATE INDEX t1h ON t1 (h(767)); UPDATE t1 SET h=@e; CREATE INDEX t1i ON t1 (i(767)); @@ -476,6 +683,7 @@ CREATE INDEX t1r ON t1 (r(767)); UPDATE t1 SET r=@e; CREATE INDEX t1s ON t1 (s(767)); UPDATE t1 SET s=@e; +} # Add one more index and we cannot update a column to its defined index length. # This is a problem. It means that the DDL is allowed to create a table @@ -486,133 +694,25 @@ UPDATE t1 SET t=@e; ROLLBACK; CREATE INDEX t1u ON t1 (u(767)); +if ($INNODB_PAGE_SIZE == 4096) +{ +CREATE INDEX t1ut ON t1 (u(767)); +CREATE INDEX t1st ON t1 (s(767)); +} +if ($INNODB_PAGE_SIZE != 4096) +{ CREATE INDEX t1ut ON t1 (u(767), t(767)); CREATE INDEX t1st ON t1 (s(767), t(767)); +} SHOW CREATE TABLE t1; DROP TABLE t1; ---echo # Bug #12429576 - Test an assertion failure on purge. -# This test is not in innodb_8k or innodb_4k since the bug is not about -# page size. It just tests the condition that caused the assertion. -CREATE TABLE t1_purge ( -A int, -B blob, C blob, D blob, E blob, -F blob, G blob, H blob, -PRIMARY KEY (B(767), C(767), D(767), E(767), A), -INDEX (A) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; - -INSERT INTO t1_purge VALUES (1, -REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), -REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766)); - -CREATE TABLE t2_purge ( -A int PRIMARY KEY, -B blob, C blob, D blob, E blob, -F blob, G blob, H blob, I blob, -J blob, K blob, L blob, -INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; - -INSERT INTO t2_purge VALUES (1, -REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766), -REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766), -REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766)); - -CREATE TABLE t3_purge ( -A int, -B varchar(800), C varchar(800), D varchar(800), E varchar(800), -F varchar(800), G varchar(800), H varchar(800), -PRIMARY KEY (B(767), C(767), D(767), E(767), A), -INDEX (A) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; - -INSERT INTO t3_purge SELECT * FROM t1_purge; - -CREATE TABLE t4_purge ( -A int PRIMARY KEY, -B varchar(800), C varchar(800), D varchar(800), E varchar(800), -F varchar(800), G varchar(800), H varchar(800), I varchar(800), -J varchar(800), K varchar(800), L varchar(800), -INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; - -INSERT INTO t4_purge SELECT * FROM t2_purge; - -# This would trigger the failure (Bug #12429576) -# if purge gets a chance to run before DROP TABLE t1_purge, .... -DELETE FROM t1_purge; -DELETE FROM t2_purge; -DELETE FROM t3_purge; -DELETE FROM t4_purge; -# We need to activate the purge thread. -# Instead of doing a --sleep 10 now, do it once at the end. - -# Bug#12637786 - Assertion hit; ut_ad(dict_index_is_clust(index)); -# A secondary index tuple is found to be too long to fit into a page. -# This test is not in innodb_8k or innodb_4k since the bug is not about -# page size. It just tests the condition that caused the assertion. -SET @r=REPEAT('a',500); -CREATE TABLE t12637786(a int, - v1 varchar(500), v2 varchar(500), v3 varchar(500), - v4 varchar(500), v5 varchar(500), v6 varchar(500), - v7 varchar(500), v8 varchar(500), v9 varchar(500), - v10 varchar(500), v11 varchar(500), v12 varchar(500), - v13 varchar(500), v14 varchar(500), v15 varchar(500), - v16 varchar(500), v17 varchar(500), v18 varchar(500) -) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; -CREATE INDEX idx1 ON t12637786(a,v1); -INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); -UPDATE t12637786 SET a=1000; -DELETE FROM t12637786; -# We need to activate the purge thread to make sure it does not assert and -# is able to clean up the old versions of secondary index entries. -# Instead of doing a --sleep 10 now for each test, do it once at the end. - ---echo # Bug#12963823 - Test that the purge thread does not crash when -# the number of indexes has changed since the UNDO record was logged. -# This test is not in innodb_8k or innodb_4k since the bug is not about -# page size. It just tests the condition that caused the crash. -CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob, - i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob) - ENGINE=innodb ROW_FORMAT=dynamic; -SET @r = REPEAT('a', 767); -INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r); -CREATE INDEX ndx_a ON t12963823 (a(500)); -CREATE INDEX ndx_b ON t12963823 (b(500)); -CREATE INDEX ndx_c ON t12963823 (c(500)); -CREATE INDEX ndx_d ON t12963823 (d(500)); -CREATE INDEX ndx_e ON t12963823 (e(500)); -CREATE INDEX ndx_f ON t12963823 (f(500)); -CREATE INDEX ndx_k ON t12963823 (k(500)); -CREATE INDEX ndx_l ON t12963823 (l(500)); - -SET @r = REPEAT('b', 500); -UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r; -UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r; -UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r; -UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r; -ALTER TABLE t12963823 DROP INDEX ndx_a; -ALTER TABLE t12963823 DROP INDEX ndx_b; -CREATE INDEX ndx_g ON t12963823 (g(500)); -CREATE INDEX ndx_h ON t12963823 (h(500)); -CREATE INDEX ndx_i ON t12963823 (i(500)); -CREATE INDEX ndx_j ON t12963823 (j(500)); -CREATE INDEX ndx_m ON t12963823 (m(500)); -CREATE INDEX ndx_n ON t12963823 (n(500)); -CREATE INDEX ndx_o ON t12963823 (o(500)); -CREATE INDEX ndx_p ON t12963823 (p(500)); -SHOW CREATE TABLE t12963823; -# We need to activate the purge thread at this point to see if it crashes. -# Instead of doing a --sleep 10 now for each test, do it once at the end. - --echo # Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE # InnoDB cannot know that this undo record would be too big for the undo # page. Too much of text field is stored in the clustered record in this # DYNAMIC row formatted record. -# This test is not in innodb_8k or innodb_4k since the bug is not about -# page size. It just tests the condition that caused the hang. -SET SESSION innodb_strict_mode = ON; CREATE TABLE bug12547647( a int NOT NULL, b blob NOT NULL, c text, PRIMARY KEY (b(10), a), INDEX (c(767)), INDEX(b(767)) @@ -626,9 +726,127 @@ SHOW WARNINGS; ROLLBACK; DROP TABLE bug12547647; +# +# Bug #13336585 - INNODB: CHANGE BUFFERING WITH 4K PAGES CAN ASSERT +# IF SECONDARY KEY IS NEAR MAX +# If the secondary index tuple is close to half the page size, +# ibuf_insert_low() could return DB_TOO_BIG_RECORD, which is not expected +# in ibuf_insert(). In order to insure this does not happen, WL5756 +# imposes a maximum key length of 768 for 4k pages and 1536 for 8k pages. +# The existing max key Size for 16k pages is 3072. +# + +#-- disable_query_log +# The flag innodb_change_buffering_debug is only available in debug builds. +# It instructs InnoDB to try to evict pages from the buffer pool when +# change buffering is possible, so that the change buffer will be used +# whenever possible. +# This flag is not used currently since it exposes valgrind error in ibuf +# code with the following SQL +#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +#SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug; +#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +#SET GLOBAL innodb_change_buffering_debug = 1; +#-- enable_query_log + +# make sure the largest possible key entry can be added to the insert buffer. +# Make enough records so that the root page is not a leaf page. +SET SESSION innodb_strict_mode = OFF; +CREATE TABLE t1( + pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), + pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), + pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), + pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), + sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), + sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), + sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), + sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), + PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, + pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), + KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, + sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) + ROW_FORMAT=Redundant ENGINE=InnoDB; +SET @r = repeat('a', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('b', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('c', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('d', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('e', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +DELETE from t1; +DROP TABLE t1; + +# Compressed tables do not compress parent pages. So the whole uncompressed +# secondary tuple including the primary key must be able to fit in half the +# compressed page size. This record length is enforced at index creation. +# So the only way to get an ibuf tuple too big is to make the KEY_BLOCK_SIZE +# the same as the page size. +CREATE TABLE t1( + pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48), + pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48), + pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48), + pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48), + sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48), + sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48), + sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48), + sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48), + PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08, + pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16), + KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08, + sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16)) + ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB; +SET @r = repeat('a', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('b', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('c', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('d', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +SET @r = repeat('e', 48); +INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r, + @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r); +DELETE from t1; +DROP TABLE t1; + +#-- disable_query_log +#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE +#SET GLOBAL innodb_change_buffering_debug = 0; +#-- enable_query_log + # The following should fail in non-strict mode too. # (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.) -SET SESSION innodb_strict_mode = off; +if ($INNODB_PAGE_SIZE == 4096) +{ +CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, + PRIMARY KEY (c(767))) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; +DROP TABLE t1; +CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, + PRIMARY KEY (c(767))) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII; +DROP TABLE t1; +CREATE TABLE t1( + c text NOT NULL, d text NOT NULL, + PRIMARY KEY (c(767))) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; +} +if ($INNODB_PAGE_SIZE != 4096) +{ CREATE TABLE t1( c text NOT NULL, d text NOT NULL, PRIMARY KEY (c(767),d(767))) @@ -643,7 +861,8 @@ CREATE TABLE t1( c text NOT NULL, d text NOT NULL, PRIMARY KEY (c(767),d(767))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII; -drop table t1; +} +DROP TABLE t1; CREATE TABLE t1(c text, PRIMARY KEY (c(440))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; DROP TABLE t1; @@ -651,62 +870,3 @@ CREATE TABLE t1(c text, PRIMARY KEY (c(438))) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII; INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512)); DROP TABLE t1; - - ---echo # ---echo # Bug#56862 Execution of a query that uses index merge returns a wrong result ---echo # - -# Moved to here from innodb_mysql.test. Some PB3 systems sporadically -# had timeouts doing this with smaller page sizes. - -CREATE TABLE t1 ( - pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, - a int, - b int, - INDEX idx(a)) -ENGINE=INNODB; - -INSERT INTO t1(a,b) VALUES - (11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), - (3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), - (6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), - (13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); -INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; -INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; -INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1(a,b) SELECT a,b FROM t1; -INSERT INTO t1 VALUES (1000000, 0, 0); - -set @optimizer_switch_saved=@@optimizer_switch; -SET SESSION optimizer_switch='derived_merge=off'; -SET SESSION sort_buffer_size = 1024*36; - -EXPLAIN -SELECT COUNT(*) FROM - (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) - WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; - -SELECT COUNT(*) FROM - (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) - WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; - -set @@optimizer_switch=@optimizer_switch_saved; -SET SESSION sort_buffer_size = DEFAULT; - -DROP TABLE t1; - - -# The tests that uses these tables required the purge thread to run. -# Just in case it has not by now, provide a 10 second wait. ---sleep 10 -DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge; -DROP TABLE t12637786; -DROP TABLE t12963823; diff --git a/mysql-test/suite/maria/alter.result b/mysql-test/suite/maria/alter.result index ea72328a38f..849aa6803f1 100644 --- a/mysql-test/suite/maria/alter.result +++ b/mysql-test/suite/maria/alter.result @@ -90,3 +90,17 @@ check table t2; Table Op Msg_type Msg_text test.t2 check status OK DROP TABLE t1,t2; +# +# MDEV-17576 +# Assertion `share->reopen == 1' failed in maria_extra upon ALTER on +# Aria table with triggers and locks +# +CREATE TABLE t1 (a INT) ENGINE=Aria; +CREATE TRIGGER tr BEFORE INSERT ON t1 FOR EACH ROW INSERT INTO t1 SELECT * FROM t1; +LOCK TABLE t1 WRITE; +ALTER TABLE t1 FORCE, LOCK=EXCLUSIVE; +DROP TRIGGER tr; +DROP TABLE t1; +# +# End of 10.2 test +# diff --git a/mysql-test/suite/maria/alter.test b/mysql-test/suite/maria/alter.test index ef21ab2e5d2..31eeac1df87 100644 --- a/mysql-test/suite/maria/alter.test +++ b/mysql-test/suite/maria/alter.test @@ -11,7 +11,7 @@ drop table if exists t1; CREATE TABLE t1 (pk INT, d DATETIME, PRIMARY KEY(pk), KEY(d)) ENGINE=Aria; ALTER TABLE t1 DISABLE KEYS; INSERT INTO t1 VALUES (1,'2000-01-01 22:22:22'),(2,'2012-12-21 12:12:12'); -INSERT INTO t1 VALUES (3, '2008-07-24'); +INSERT INTO t1 VALUES (3, '2008-07-24'); ALTER TABLE t1 ENABLE KEYS; SELECT t1a.pk FROM t1 AS t1a LEFT JOIN t1 AS t1b ON t1a.pk = t1b.pk; @@ -74,6 +74,19 @@ check table t1; check table t2; DROP TABLE t1,t2; -# -# End of 10.2 tests -# +--echo # +--echo # MDEV-17576 +--echo # Assertion `share->reopen == 1' failed in maria_extra upon ALTER on +--echo # Aria table with triggers and locks +--echo # + +CREATE TABLE t1 (a INT) ENGINE=Aria; +CREATE TRIGGER tr BEFORE INSERT ON t1 FOR EACH ROW INSERT INTO t1 SELECT * FROM t1; +LOCK TABLE t1 WRITE; +ALTER TABLE t1 FORCE, LOCK=EXCLUSIVE; +DROP TRIGGER tr; +DROP TABLE t1; + +--echo # +--echo # End of 10.2 test +--echo # diff --git a/mysql-test/suite/maria/kill.result b/mysql-test/suite/maria/kill.result new file mode 100644 index 00000000000..5e75e75a41a --- /dev/null +++ b/mysql-test/suite/maria/kill.result @@ -0,0 +1,13 @@ +# +# MDEV-14996 +# Assertion `!thd->get_stmt_da()->is_sent() || +# thd->killed == KILL_CONNECTION' failed in ha_maria::external_lock +# +CREATE TABLE t1 (a INT) ENGINE=Aria; +connect con1,localhost,root,,; +FLUSH TABLE t1 FOR EXPORT; +KILL CONNECTION_ID(); +ERROR 70100: Connection was killed +disconnect con1; +connection default; +DROP TABLE t1; diff --git a/mysql-test/suite/maria/kill.test b/mysql-test/suite/maria/kill.test new file mode 100644 index 00000000000..98a588a6698 --- /dev/null +++ b/mysql-test/suite/maria/kill.test @@ -0,0 +1,14 @@ +--echo # +--echo # MDEV-14996 +--echo # Assertion `!thd->get_stmt_da()->is_sent() || +--echo # thd->killed == KILL_CONNECTION' failed in ha_maria::external_lock +--echo # + +CREATE TABLE t1 (a INT) ENGINE=Aria; +--connect (con1,localhost,root,,) +FLUSH TABLE t1 FOR EXPORT; +--error ER_CONNECTION_KILLED +KILL CONNECTION_ID(); +--disconnect con1 +--connection default +DROP TABLE t1; diff --git a/mysql-test/suite/mariabackup/missing_ibd.result b/mysql-test/suite/mariabackup/missing_ibd.result index 53989be7c14..7ff63b7d434 100644 --- a/mysql-test/suite/mariabackup/missing_ibd.result +++ b/mysql-test/suite/mariabackup/missing_ibd.result @@ -2,5 +2,5 @@ create table t1(c1 int) engine=InnoDB; INSERT INTO t1 VALUES(1); # xtrabackup backup select * from t1; -ERROR 42S02: Table 'test.t1' doesn't exist in engine +ERROR HY000: Got error 194 "Tablespace is missing for a table" from storage engine InnoDB drop table t1; diff --git a/mysql-test/suite/mariabackup/missing_ibd.test b/mysql-test/suite/mariabackup/missing_ibd.test index 22044e59841..ce22616e25d 100644 --- a/mysql-test/suite/mariabackup/missing_ibd.test +++ b/mysql-test/suite/mariabackup/missing_ibd.test @@ -30,7 +30,6 @@ exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir rmdir $targetdir; ---error ER_NO_SUCH_TABLE_IN_ENGINE +--error ER_GET_ERRNO select * from t1; drop table t1; - diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 22105923764..b2a43e00742 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -876,6 +876,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST OFF,ON,FORCE READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_ENCRYPT_TEMPORARY_TABLES +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enrypt the temporary table data. +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY YES +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_FAST_SHUTDOWN SESSION_VALUE NULL GLOBAL_VALUE 1 diff --git a/plugin/disks/information_schema_disks.cc b/plugin/disks/information_schema_disks.cc index 5bd7e0e46b7..9e0eb95bd97 100644 --- a/plugin/disks/information_schema_disks.cc +++ b/plugin/disks/information_schema_disks.cc @@ -20,6 +20,7 @@ #include #include #include +#include /* check_global_access() */ bool schema_table_store_record(THD *thd, TABLE *table); @@ -84,6 +85,9 @@ int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond) int rv = 1; TABLE* pTable = pTables->table; + if (check_global_access(pThd, FILE_ACL, true)) + return 0; + FILE* pFile = setmntent("/etc/mtab", "r"); if (pFile) @@ -145,11 +149,11 @@ maria_declare_plugin(disks) PLUGIN_LICENSE_GPL, /* license type */ disks_table_init, /* init function */ NULL, /* deinit function */ - 0x0100, /* version = 1.0 */ + 0x0101, /* version = 1.1 */ NULL, /* no status variables */ NULL, /* no system variables */ - "1.0", /* String version representation */ - MariaDB_PLUGIN_MATURITY_BETA /* Maturity (see include/mysql/plugin.h)*/ + "1.1", /* String version representation */ + MariaDB_PLUGIN_MATURITY_STABLE /* Maturity (see include/mysql/plugin.h)*/ } mysql_declare_plugin_end; diff --git a/plugin/disks/mysql-test/disks/disks_notembedded.result b/plugin/disks/mysql-test/disks/disks_notembedded.result new file mode 100644 index 00000000000..974294744cc --- /dev/null +++ b/plugin/disks/mysql-test/disks/disks_notembedded.result @@ -0,0 +1,22 @@ +# +# MDEV-18328: Make DISKS plugin check some privilege to access +# information_schema.DISKS table +# +CREATE USER user1@localhost; +GRANT SELECT ON *.* TO user1@localhost; +connect con1,localhost,user1,,; +connection con1; +select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks; +sum(Total) > sum(Available) sum(Total)>sum(Used) +NULL NULL +disconnect con1; +connection default; +GRANT FILE ON *.* TO user1@localhost; +connect con1,localhost,user1,,; +connection con1; +select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks; +sum(Total) > sum(Available) sum(Total)>sum(Used) +1 1 +connection default; +DROP USER user1@localhost; +# End of 10.1 tests diff --git a/plugin/disks/mysql-test/disks/disks_notembedded.test b/plugin/disks/mysql-test/disks/disks_notembedded.test new file mode 100644 index 00000000000..a0f6c2e5887 --- /dev/null +++ b/plugin/disks/mysql-test/disks/disks_notembedded.test @@ -0,0 +1,25 @@ +source include/not_embedded.inc; + +--echo # +--echo # MDEV-18328: Make DISKS plugin check some privilege to access +--echo # information_schema.DISKS table +--echo # + +CREATE USER user1@localhost; +GRANT SELECT ON *.* TO user1@localhost; + +connect (con1,localhost,user1,,); +connection con1; +select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks; +disconnect con1; + +connection default; +GRANT FILE ON *.* TO user1@localhost; + +connect (con1,localhost,user1,,); +connection con1; +select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks; +connection default; +DROP USER user1@localhost; + +--echo # End of 10.1 tests diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index a2299812c05..464b434b3ab 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -15,7 +15,7 @@ #define PLUGIN_VERSION 0x104 -#define PLUGIN_STR_VERSION "1.4.6" +#define PLUGIN_STR_VERSION "1.4.7" #define _my_thread_var loc_thread_var @@ -290,7 +290,7 @@ static unsigned long long file_rotate_size; static unsigned int rotations; static my_bool rotate= TRUE; static char logging; -static int internal_stop_logging= 0; +static volatile int internal_stop_logging= 0; static char incl_user_buffer[1024]; static char excl_user_buffer[1024]; static char *big_buffer= NULL; @@ -536,16 +536,20 @@ static struct st_mysql_show_var audit_status[]= #if defined(HAVE_PSI_INTERFACE) && !defined(FLOGGER_NO_PSI) /* These belong to the service initialization */ static PSI_mutex_key key_LOCK_operations; +static PSI_mutex_key key_LOCK_atomic; static PSI_mutex_key key_LOCK_bigbuffer; static PSI_mutex_info mutex_key_list[]= { { &key_LOCK_operations, "SERVER_AUDIT_plugin::lock_operations", PSI_FLAG_GLOBAL}, + { &key_LOCK_atomic, "SERVER_AUDIT_plugin::lock_atomic", + PSI_FLAG_GLOBAL}, { &key_LOCK_bigbuffer, "SERVER_AUDIT_plugin::lock_bigbuffer", PSI_FLAG_GLOBAL} }; #endif static mysql_mutex_t lock_operations; +static mysql_mutex_t lock_atomic; static mysql_mutex_t lock_bigbuffer; /* The Percona server and partly MySQL don't support */ @@ -556,6 +560,14 @@ static mysql_mutex_t lock_bigbuffer; /* worths doing. */ #define CLIENT_ERROR if (!started_mysql) my_printf_error +#define ADD_ATOMIC(x, a) \ + do { \ + flogger_mutex_lock(&lock_atomic); \ + x+= a; \ + flogger_mutex_unlock(&lock_atomic); \ + } while (0) + + static uchar *getkey_user(const char *entry, size_t *length, my_bool nu __attribute__((unused)) ) { @@ -734,20 +746,20 @@ static int user_coll_fill(struct user_coll *c, char *users, if (cmp_user && take_over_cmp) { - internal_stop_logging= 1; + ADD_ATOMIC(internal_stop_logging, 1); CLIENT_ERROR(1, "User '%.*s' was removed from the" " server_audit_excl_users.", MYF(ME_JUST_WARNING), (int) cmp_length, users); - internal_stop_logging= 0; + ADD_ATOMIC(internal_stop_logging, -1); blank_user(cmp_user); refill_cmp_coll= 1; } else if (cmp_user) { - internal_stop_logging= 1; + ADD_ATOMIC(internal_stop_logging, 1); CLIENT_ERROR(1, "User '%.*s' is in the server_audit_incl_users, " "so wasn't added.", MYF(ME_JUST_WARNING), (int) cmp_length, users); - internal_stop_logging= 0; + ADD_ATOMIC(internal_stop_logging, -1); remove_user(users); continue; } @@ -1255,23 +1267,30 @@ static void change_connection(struct connection_info *cn, event->ip, event->ip_length); } -static int write_log(const char *message, size_t len) +static int write_log(const char *message, size_t len, int take_lock) { + int result= 0; + if (take_lock) + flogger_mutex_lock(&lock_operations); + if (output_type == OUTPUT_FILE) { if (logfile && - (is_active= (logger_write(logfile, message, len) == (int)len))) - return 0; + (is_active= (logger_write(logfile, message, len) == (int) len))) + goto exit; ++log_write_failures; - return 1; + result= 1; } else if (output_type == OUTPUT_SYSLOG) { syslog(syslog_facility_codes[syslog_facility] | syslog_priority_codes[syslog_priority], - "%s %.*s", syslog_info, (int)len, message); + "%s %.*s", syslog_info, (int) len, message); } - return 0; +exit: + if (take_lock) + flogger_mutex_unlock(&lock_operations); + return result; } @@ -1330,7 +1349,7 @@ static int log_connection(const struct connection_info *cn, csize+= my_snprintf(message+csize, sizeof(message) - 1 - csize, ",%.*s,,%d", cn->db_length, cn->db, event->status); message[csize]= '\n'; - return write_log(message, csize + 1); + return write_log(message, csize + 1, 1); } @@ -1351,7 +1370,7 @@ static int log_connection_event(const struct mysql_event_connection *event, csize+= my_snprintf(message+csize, sizeof(message) - 1 - csize, ",%.*s,,%d", event->database.length, event->database.str, event->status); message[csize]= '\n'; - return write_log(message, csize + 1); + return write_log(message, csize + 1, 1); } @@ -1480,21 +1499,28 @@ no_password: -static int do_log_user(const char *name) +static int do_log_user(const char *name, int take_lock) { size_t len; + int result; if (!name) return 0; len= strlen(name); + if (take_lock) + flogger_mutex_lock(&lock_operations); + if (incl_user_coll.n_users) - return coll_search(&incl_user_coll, name, len) != 0; + result= coll_search(&incl_user_coll, name, len) != 0; + else if (excl_user_coll.n_users) + result= coll_search(&excl_user_coll, name, len) == 0; + else + result= 1; - if (excl_user_coll.n_users) - return coll_search(&excl_user_coll, name, len) == 0; - - return 1; + if (take_lock) + flogger_mutex_unlock(&lock_operations); + return result; } @@ -1591,7 +1617,7 @@ not_in_list: static int log_statement_ex(const struct connection_info *cn, time_t ev_time, unsigned long thd_id, const char *query, unsigned int query_len, - int error_code, const char *type) + int error_code, const char *type, int take_lock) { size_t csize; char message_loc[1024]; @@ -1739,7 +1765,7 @@ do_log_query: csize+= my_snprintf(message+csize, message_size - 1 - csize, "\',%d", error_code); message[csize]= '\n'; - result= write_log(message, csize + 1); + result= write_log(message, csize + 1, take_lock); if (message == big_buffer) flogger_mutex_unlock(&lock_bigbuffer); @@ -1753,7 +1779,7 @@ static int log_statement(const struct connection_info *cn, { return log_statement_ex(cn, event->general_time, event->general_thread_id, event->general_query, event->general_query_length, - event->general_error_code, type); + event->general_error_code, type, 1); } @@ -1775,7 +1801,7 @@ static int log_table(const struct connection_info *cn, ",%.*s,%.*s,",event->database.length, event->database.str, event->table.length, event->table.str); message[csize]= '\n'; - return write_log(message, csize + 1); + return write_log(message, csize + 1, 1); } @@ -1799,7 +1825,7 @@ static int log_rename(const struct connection_info *cn, event->new_database.length, event->new_database.str, event->new_table.length, event->new_table.str); message[csize]= '\n'; - return write_log(message, csize + 1); + return write_log(message, csize + 1, 1); } @@ -1991,8 +2017,6 @@ void auditing(MYSQL_THD thd, unsigned int event_class, const void *ev) if (!thd || internal_stop_logging) return; - flogger_mutex_lock(&lock_operations); - if (maria_55_started && debug_server_started && event_class == MYSQL_AUDIT_GENERAL_CLASS) { @@ -2031,7 +2055,7 @@ void auditing(MYSQL_THD thd, unsigned int event_class, const void *ev) } if (event_class == MYSQL_AUDIT_GENERAL_CLASS && FILTER(EVENT_QUERY) && - cn && (cn->log_always || do_log_user(cn->user))) + cn && (cn->log_always || do_log_user(cn->user, 1))) { const struct mysql_event_general *event = (const struct mysql_event_general *) ev; @@ -2051,7 +2075,7 @@ void auditing(MYSQL_THD thd, unsigned int event_class, const void *ev) { const struct mysql_event_table *event = (const struct mysql_event_table *) ev; - if (do_log_user(event->user)) + if (do_log_user(event->user, 1)) { switch (event->event_subclass) { @@ -2115,7 +2139,6 @@ exit_func: break; } } - flogger_mutex_unlock(&lock_operations); } @@ -2384,6 +2407,7 @@ static int server_audit_init(void *p __attribute__((unused))) PSI_server->register_mutex("server_audit", mutex_key_list, 1); #endif flogger_mutex_init(key_LOCK_operations, &lock_operations, MY_MUTEX_INIT_FAST); + flogger_mutex_init(key_LOCK_operations, &lock_atomic, MY_MUTEX_INIT_FAST); flogger_mutex_init(key_LOCK_operations, &lock_bigbuffer, MY_MUTEX_INIT_FAST); coll_init(&incl_user_coll); @@ -2471,6 +2495,7 @@ static int server_audit_deinit(void *p __attribute__((unused))) (void) free(big_buffer); flogger_mutex_destroy(&lock_operations); + flogger_mutex_destroy(&lock_atomic); flogger_mutex_destroy(&lock_bigbuffer); error_header(); @@ -2563,7 +2588,7 @@ static void log_current_query(MYSQL_THD thd) { cn->log_always= 1; log_statement_ex(cn, cn->query_time, thd_get_thread_id(thd), - cn->query, cn->query_length, 0, "QUERY"); + cn->query, cn->query_length, 0, "QUERY", 0); cn->log_always= 0; } } @@ -2575,12 +2600,13 @@ static void update_file_path(MYSQL_THD thd, { char *new_name= (*(char **) save) ? *(char **) save : empty_str; - if (!maria_55_started || !debug_server_started) - flogger_mutex_lock(&lock_operations); - internal_stop_logging= 1; + ADD_ATOMIC(internal_stop_logging, 1); error_header(); fprintf(stderr, "Log file name was changed to '%s'.\n", new_name); + if (!maria_55_started || !debug_server_started) + flogger_mutex_lock(&lock_operations); + if (logging) log_current_query(thd); @@ -2589,7 +2615,6 @@ static void update_file_path(MYSQL_THD thd, char *sav_path= file_path; file_path= new_name; - internal_stop_logging= 1; stop_logging(); if (start_logging()) { @@ -2605,16 +2630,15 @@ static void update_file_path(MYSQL_THD thd, } goto exit_func; } - internal_stop_logging= 0; } strncpy(path_buffer, new_name, sizeof(path_buffer)-1); path_buffer[sizeof(path_buffer)-1]= 0; file_path= path_buffer; exit_func: - internal_stop_logging= 0; if (!maria_55_started || !debug_server_started) flogger_mutex_unlock(&lock_operations); + ADD_ATOMIC(internal_stop_logging, -1); } @@ -2745,8 +2769,8 @@ static void update_output_type(MYSQL_THD thd, if (output_type == new_output_type) return; + ADD_ATOMIC(internal_stop_logging, 1); flogger_mutex_lock(&lock_operations); - internal_stop_logging= 1; if (logging) { log_current_query(thd); @@ -2760,8 +2784,8 @@ static void update_output_type(MYSQL_THD thd, if (logging) start_logging(); - internal_stop_logging= 0; flogger_mutex_unlock(&lock_operations); + ADD_ATOMIC(internal_stop_logging, -1); } @@ -2809,9 +2833,9 @@ static void update_logging(MYSQL_THD thd, if (new_logging == logging) return; + ADD_ATOMIC(internal_stop_logging, 1); if (!maria_55_started || !debug_server_started) flogger_mutex_lock(&lock_operations); - internal_stop_logging= 1; if ((logging= new_logging)) { start_logging(); @@ -2827,9 +2851,9 @@ static void update_logging(MYSQL_THD thd, stop_logging(); } - internal_stop_logging= 0; if (!maria_55_started || !debug_server_started) flogger_mutex_unlock(&lock_operations); + ADD_ATOMIC(internal_stop_logging, -1); } @@ -2841,16 +2865,16 @@ static void update_mode(MYSQL_THD thd __attribute__((unused)), if (mode_readonly || new_mode == mode) return; + ADD_ATOMIC(internal_stop_logging, 1); if (!maria_55_started || !debug_server_started) flogger_mutex_lock(&lock_operations); - internal_stop_logging= 1; mark_always_logged(thd); error_header(); fprintf(stderr, "Logging mode was changed from %d to %d.\n", mode, new_mode); mode= new_mode; - internal_stop_logging= 0; if (!maria_55_started || !debug_server_started) flogger_mutex_unlock(&lock_operations); + ADD_ATOMIC(internal_stop_logging, -1); } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 7d99cbdb936..21f4a5ba1e2 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -286,6 +286,7 @@ public: virtual const char* func_name() const { return "isnottrue"; } Item *get_copy(THD *thd) { return get_item_copy(thd, this); } + bool eval_not_null_tables(void *) { not_null_tables_cache= 0; return false; } }; @@ -317,6 +318,7 @@ public: virtual const char* func_name() const { return "isnotfalse"; } Item *get_copy(THD *thd) { return get_item_copy(thd, this); } + bool eval_not_null_tables(void *) { not_null_tables_cache= 0; return false; } }; diff --git a/sql/protocol.cc b/sql/protocol.cc index 84ca4585a12..02c95a42695 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -461,6 +461,17 @@ bool net_send_error_packet(THD *thd, uint sql_errno, const char *err, */ if ((save_compress= net->compress)) net->compress= 2; + + /* + Sometimes, we send errors "out-of-band", e.g ER_CONNECTION_KILLED + on an idle connection. The current protocol "sequence number" is 0, + however some client drivers would however always expect packets + coming from server to have seq_no > 0, due to missing awareness + of "out-of-band" operations. Make these clients happy. + */ + if (!net->pkt_nr) + net->pkt_nr= 1; + ret= net_write_command(net,(uchar) 255, (uchar*) "", 0, (uchar*) buff, length); net->compress= save_compress; diff --git a/sql/sql_audit.cc b/sql/sql_audit.cc index e8a00abf30b..ed175ae4865 100644 --- a/sql/sql_audit.cc +++ b/sql/sql_audit.cc @@ -119,11 +119,26 @@ void mysql_audit_acquire_plugins(THD *thd, ulong *event_class_mask) { plugin_foreach(thd, acquire_plugins, MYSQL_AUDIT_PLUGIN, event_class_mask); add_audit_mask(thd->audit_class_mask, event_class_mask); + thd->audit_plugin_version= global_plugin_version; } DBUG_VOID_RETURN; } +/** + Check if there were changes in the state of plugins + so we need to do the mysql_audit_release asap. + + @param[in] thd + +*/ + +my_bool mysql_audit_release_required(THD *thd) +{ + return thd && (thd->audit_plugin_version != global_plugin_version); +} + + /** Release any resources associated with the current thd. @@ -159,6 +174,7 @@ void mysql_audit_release(THD *thd) /* Reset the state of thread values */ reset_dynamic(&thd->audit_class_plugins); bzero(thd->audit_class_mask, sizeof(thd->audit_class_mask)); + thd->audit_plugin_version= -1; } diff --git a/sql/sql_audit.h b/sql/sql_audit.h index 327fe6052ab..59cced13b0a 100644 --- a/sql/sql_audit.h +++ b/sql/sql_audit.h @@ -58,6 +58,7 @@ static inline void mysql_audit_notify(THD *thd, uint event_class, #define mysql_audit_connection_enabled() 0 #define mysql_audit_table_enabled() 0 #endif +extern my_bool mysql_audit_release_required(THD *thd); extern void mysql_audit_release(THD *thd); static inline unsigned int strlen_uint(const char *s) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 32fd2cf5edd..455c5fd47b5 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -629,6 +629,9 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) waiting_on_group_commit(FALSE), has_waiter(FALSE), spcont(NULL), m_parser_state(NULL), +#ifndef EMBEDDED_LIBRARY + audit_plugin_version(-1), +#endif #if defined(ENABLED_DEBUG_SYNC) debug_sync_control(0), #endif /* defined(ENABLED_DEBUG_SYNC) */ diff --git a/sql/sql_class.h b/sql/sql_class.h index fa2fadd598e..acf699f72e3 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3255,6 +3255,7 @@ public: added to the list of audit plugins which are currently in use. */ unsigned long audit_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE]; + int audit_plugin_version; #endif #if defined(ENABLED_DEBUG_SYNC) diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 17aba33a76e..826951f7c9d 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -1398,7 +1398,8 @@ void do_handle_one_connection(CONNECT *connect) while (thd_is_connection_alive(thd)) { - mysql_audit_release(thd); + if (mysql_audit_release_required(thd)) + mysql_audit_release(thd); if (do_command(thd)) break; } diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 7ebc0781a7c..b568c5cb229 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -227,6 +227,7 @@ static DYNAMIC_ARRAY plugin_array; static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM]; static MEM_ROOT plugin_mem_root; static bool reap_needed= false; +volatile int global_plugin_version= 1; static bool initialized= 0; ulong dlopen_count; @@ -2217,6 +2218,7 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, reap_plugins(); } err: + global_plugin_version++; mysql_mutex_unlock(&LOCK_plugin); if (argv) free_defaults(argv); @@ -2364,6 +2366,7 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, } reap_plugins(); + global_plugin_version++; mysql_mutex_unlock(&LOCK_plugin); DBUG_RETURN(error); diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h index b60fe0f13eb..352d1e22ac4 100644 --- a/sql/sql_plugin.h +++ b/sql/sql_plugin.h @@ -38,6 +38,7 @@ enum enum_plugin_load_option { PLUGIN_OFF, PLUGIN_ON, PLUGIN_FORCE, PLUGIN_FORCE_PLUS_PERMANENT }; extern const char *global_plugin_typelib_names[]; +extern volatile int global_plugin_version; extern ulong dlopen_count; #include diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index 75b924a7569..6a4139197f9 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -353,7 +353,8 @@ static int threadpool_process_request(THD *thd) { Vio *vio; thd->net.reading_or_writing= 0; - mysql_audit_release(thd); + if (mysql_audit_release_required(thd)) + mysql_audit_release(thd); if ((retval= do_command(thd)) != 0) goto end; diff --git a/storage/innobase/.clang-format b/storage/innobase/.clang-format deleted file mode 100644 index f7a72f3cf24..00000000000 --- a/storage/innobase/.clang-format +++ /dev/null @@ -1,10 +0,0 @@ -UseTab: Always -TabWidth: 8 -IndentWidth: 8 -BreakBeforeBinaryOperators: All -PointerAlignment: Left -AlwaysBreakAfterReturnType: TopLevel -BreakBeforeBraces: Custom -BraceWrapping: - AfterFunction: true -AccessModifierOffset: -8 diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 99794144a04..971615f96e6 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -4693,7 +4693,7 @@ btr_cur_pessimistic_update( ut_ad(dict_index_is_clust(index)); ut_ad(thr_get_trx(thr)->in_rollback); - DBUG_EXECUTE_IF("ib_blob_update_rollback", DBUG_SUICIDE();); + DEBUG_SYNC_C("blob_rollback_middle"); btr_rec_free_updated_extern_fields( index, rec, page_zip, *offsets, update, true, mtr); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 2a7ce5235ae..3a2ebed4878 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -60,6 +60,7 @@ Created 11/5/1995 Heikki Tuuri #include "dict0dict.h" #include "log0recv.h" #include "srv0mon.h" +#include "log0crypt.h" #endif /* !UNIV_INNOCHECKSUM */ #include "page0zip.h" #include "sync0sync.h" @@ -476,6 +477,45 @@ buf_pool_register_chunk( chunk->blocks->frame, chunk)); } +/** Decrypt a page for temporary tablespace. +@param[in,out] tmp_frame Temporary buffer +@param[in] src_frame Page to decrypt +@return true if temporary tablespace decrypted, false if not */ +static bool buf_tmp_page_decrypt(byte* tmp_frame, byte* src_frame) +{ + if (buf_page_is_zeroes(src_frame, srv_page_size)) { + return true; + } + + /* read space & lsn */ + uint header_len = FIL_PAGE_DATA; + + /* Copy FIL page header, it is not encrypted */ + memcpy(tmp_frame, src_frame, header_len); + + /* Calculate the offset where decryption starts */ + const byte* src = src_frame + header_len; + byte* dst = tmp_frame + header_len; + uint srclen = uint(srv_page_size) + - header_len - FIL_PAGE_DATA_END; + ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET); + + if (!log_tmp_block_decrypt(src, srclen, dst, + (offset * srv_page_size))) { + return false; + } + + memcpy(tmp_frame + srv_page_size - FIL_PAGE_DATA_END, + src_frame + srv_page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + memcpy(src_frame, tmp_frame, srv_page_size); + srv_stats.pages_decrypted.inc(); + srv_stats.n_temp_blocks_decrypted.inc(); + + return true; /* page was decrypted */ +} + /** Decrypt a page. @param[in,out] bpage Page control block @param[in,out] space tablespace @@ -495,6 +535,22 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) return (true); } + if (space->purpose == FIL_TYPE_TEMPORARY + && innodb_encrypt_temporary_tables) { + buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool); + buf_tmp_reserve_crypt_buf(slot); + + if (!buf_tmp_page_decrypt(slot->crypt_buf, dst_frame)) { + slot->release(); + ib::error() << "Encrypted page " << bpage->id + << " in file " << space->chain.start->name; + return false; + } + + slot->release(); + return true; + } + /* Page is encrypted if encryption information is found from tablespace and page contains used key_version. This is true also for pages first compressed and then encrypted. */ @@ -921,6 +977,22 @@ static uint32_t buf_page_check_crc32(const byte* page, uint32_t checksum) # define buf_page_check_crc32(page, checksum) buf_calc_page_crc32(page) #endif /* INNODB_BUG_ENDIAN_CRC32 */ +/** Check if a page is all zeroes. +@param[in] read_buf database page +@param[in] page_size page frame size +@return whether the page is all zeroes */ +bool buf_page_is_zeroes(const void* read_buf, size_t page_size) +{ + const ulint* b = reinterpret_cast(read_buf); + const ulint* const e = b + page_size / sizeof *b; + do { + if (*b++) { + return false; + } + } while (b != e); + return true; +} + /** Check if a page is corrupt. @param[in] check_lsn whether the LSN should be checked @param[in] read_buf database page @@ -1247,11 +1319,13 @@ buf_page_print(const byte* read_buf, const page_size_t& page_size) { dict_index_t* index; +#ifndef UNIV_DEBUG ib::info() << "Page dump in ascii and hex (" << page_size.physical() << " bytes):"; ut_print_buf(stderr, read_buf, page_size.physical()); fputs("\nInnoDB: End of page dump\n", stderr); +#endif if (page_size.is_compressed()) { /* Print compressed page. */ @@ -7326,6 +7400,44 @@ operator<<( return(out); } +/** Encrypt a buffer of temporary tablespace +@param[in] offset Page offset +@param[in] src_frame Page to encrypt +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ +static byte* buf_tmp_page_encrypt( + ulint offset, + byte* src_frame, + byte* dst_frame) +{ + uint header_len = FIL_PAGE_DATA; + /* FIL page header is not encrypted */ + memcpy(dst_frame, src_frame, header_len); + + /* Calculate the start offset in a page */ + uint unencrypted_bytes = header_len + FIL_PAGE_DATA_END; + uint srclen = srv_page_size - unencrypted_bytes; + const byte* src = src_frame + header_len; + byte* dst = dst_frame + header_len; + + if (!log_tmp_block_encrypt(src, srclen, dst, (offset * srv_page_size), + true)) { + return NULL; + } + + memcpy(dst_frame + srv_page_size - FIL_PAGE_DATA_END, + src_frame + srv_page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + /* Handle post encryption checksum */ + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, + buf_calc_page_crc32(dst_frame)); + + srv_stats.pages_encrypted.inc(); + srv_stats.n_temp_blocks_encrypted.inc(); + return dst_frame; +} + /** Encryption and page_compression hook that is called just before a page is written to disk. @param[in,out] space tablespace @@ -7359,13 +7471,21 @@ buf_page_encrypt_before_write( fil_space_crypt_t* crypt_data = space->crypt_data; - const bool encrypted = crypt_data - && !crypt_data->not_encrypted() - && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED - && (!crypt_data->is_default_encryption() - || srv_encrypt_tables); + bool encrypted, page_compressed; - bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); + if (space->purpose == FIL_TYPE_TEMPORARY) { + ut_ad(!crypt_data); + encrypted = innodb_encrypt_temporary_tables; + page_compressed = false; + } else { + encrypted = crypt_data + && !crypt_data->not_encrypted() + && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED + && (!crypt_data->is_default_encryption() + || srv_encrypt_tables); + + page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); + } if (!encrypted && !page_compressed) { /* No need to encrypt or page compress the page. @@ -7386,18 +7506,25 @@ buf_page_encrypt_before_write( if (!page_compressed) { not_compressed: - /* Encrypt page content */ - byte* tmp = fil_space_encrypt(space, - bpage->id.page_no(), - bpage->newest_modification, - src_frame, - dst_frame); + byte* tmp; + if (space->purpose == FIL_TYPE_TEMPORARY) { + /* Encrypt temporary tablespace page content */ + tmp = buf_tmp_page_encrypt(bpage->id.page_no(), + src_frame, dst_frame); + } else { + /* Encrypt page content */ + tmp = fil_space_encrypt( + space, bpage->id.page_no(), + bpage->newest_modification, + src_frame, dst_frame); + } bpage->real_size = srv_page_size; slot->out_buf = dst_frame = tmp; ut_d(fil_page_type_validate(tmp)); } else { + ut_ad(space->purpose != FIL_TYPE_TEMPORARY); /* First we compress the page content */ buf_tmp_reserve_compression_buf(slot); byte* tmp = slot->comp_buf; diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 8edc4768383..2374426ca98 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -335,20 +335,6 @@ too_small: goto start_again; } -/** Check if a page is all zeroes. -@param[in] read_buf database page -@param[in] page_size page frame size -@return whether the page is all zeroes */ -static bool buf_page_is_zeroes(const byte* read_buf, size_t page_size) -{ - for (ulint i = 0; i < page_size; i++) { - if (read_buf[i] != 0) { - return false; - } - } - return true; -} - /** At database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 8eb4b0b12a6..e4354caee69 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -2401,7 +2401,8 @@ fil_space_crypt_close_tablespace( { fil_space_crypt_t* crypt_data = space->crypt_data; - if (!crypt_data || srv_n_fil_crypt_threads == 0) { + if (!crypt_data || srv_n_fil_crypt_threads == 0 + || !fil_crypt_threads_inited) { return; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 9fb9e1a4138..16ca2cb17cc 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -4372,6 +4372,11 @@ fil_io( req_type.set_fil_node(node); + ut_ad(!req_type.is_write() + || page_id.space() == SRV_LOG_SPACE_FIRST_ID + || !fil_is_user_tablespace_id(page_id.space()) + || offset == page_id.page_no() * page_size.physical()); + /* Queue the aio request */ dberr_t err = os_aio( req_type, diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4036d20dce4..a2217ae8725 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1170,6 +1170,12 @@ static SHOW_VAR innodb_status_variables[]= { {"encryption_n_rowlog_blocks_decrypted", (char*)&export_vars.innodb_n_rowlog_blocks_decrypted, SHOW_LONGLONG}, + {"encryption_n_temp_blocks_encrypted", + (char*)&export_vars.innodb_n_temp_blocks_encrypted, + SHOW_LONGLONG}, + {"encryption_n_temp_blocks_decrypted", + (char*)&export_vars.innodb_n_temp_blocks_decrypted, + SHOW_LONGLONG}, /* scrubing */ {"scrub_background_page_reorganizations", @@ -3802,7 +3808,8 @@ static int innodb_init_params() } #endif - if ((srv_encrypt_tables || srv_encrypt_log) + if ((srv_encrypt_tables || srv_encrypt_log + || innodb_encrypt_temporary_tables) && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { sql_print_error("InnoDB: cannot enable encryption, " "encryption plugin is not available"); @@ -6178,7 +6185,7 @@ no_such_table: if (!thd_tablespace_op(thd)) { set_my_errno(ENOENT); - int ret_err = HA_ERR_NO_SUCH_TABLE; + int ret_err = HA_ERR_TABLESPACE_MISSING; if (space && space->crypt_data && space->crypt_data->is_encrypted()) { @@ -9371,7 +9378,7 @@ ha_innobase::index_read( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -9382,8 +9389,7 @@ ha_innobase::index_read( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - //error = HA_ERR_TABLESPACE_MISSING; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; default: @@ -9524,15 +9530,16 @@ ha_innobase::change_active_index( /* Initialization of search_tuple is not needed for FT index since FT search returns rank only. In addition engine should be able to retrieve FTS_DOC_ID column value if necessary. */ - if ((m_prebuilt->index->type & DICT_FTS)) { -#ifdef MYSQL_STORE_FTS_DOC_ID - if (table->fts_doc_id_field - && bitmap_is_set(table->read_set, - table->fts_doc_id_field->field_index - && m_prebuilt->read_just_key)) { - m_prebuilt->fts_doc_id_in_read_set = 1; + if (m_prebuilt->index->type & DICT_FTS) { + for (ulint i = 0; i < table->s->fields; i++) { + if (m_prebuilt->read_just_key + && bitmap_get_next_set(table->read_set, i) + && !strcmp(table->s->field[i]->field_name.str, + FTS_DOC_ID_COL_NAME)) { + m_prebuilt->fts_doc_id_in_read_set = true; + break; + } } -#endif } else { dtuple_set_n_fields(m_prebuilt->search_tuple, m_prebuilt->index->n_fields); @@ -9543,13 +9550,12 @@ ha_innobase::change_active_index( /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is always added to read_set. */ - -#ifdef MYSQL_STORE_FTS_DOC_ID - m_prebuilt->fts_doc_id_in_read_set = - (m_prebuilt->read_just_key && table->fts_doc_id_field - && m_prebuilt->in_fts_query); -#endif - + m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query + && m_prebuilt->read_just_key + && dict_index_contains_col_or_prefix( + m_prebuilt->index, + m_prebuilt->table->fts->doc_col, + false); } /* MySQL changes the active index for a handle also during some @@ -9628,7 +9634,7 @@ ha_innobase::general_fetch( table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -9937,7 +9943,7 @@ ha_innobase::ft_init_ext( /* If tablespace is discarded, we should return here */ if (!ft_table->space) { - my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str, + my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str, table->s->table_name.str); return(NULL); } @@ -10154,7 +10160,7 @@ next_record: table->s->table_name.str); table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; + error = HA_ERR_TABLESPACE_MISSING; break; case DB_TABLESPACE_NOT_FOUND: @@ -11112,6 +11118,17 @@ err_col: dict_table_add_system_columns(table, heap); if (table->is_temporary()) { + if ((options->encryption == 1 + && !innodb_encrypt_temporary_tables) + || (options->encryption == 2 + && innodb_encrypt_temporary_tables)) { + push_warning_printf(m_thd, + Sql_condition::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "Ignoring encryption parameter during " + "temporary table creation."); + } + /* Get a new table ID. FIXME: Make this a private sequence, not shared with persistent tables! */ dict_table_assign_new_id(table, m_trx); @@ -15581,7 +15598,7 @@ ha_innobase::external_lock( ER_TABLESPACE_DISCARDED, table->s->table_name.str); - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + DBUG_RETURN(HA_ERR_TABLESPACE_MISSING); } row_quiesce_table_start(m_prebuilt->table, trx); @@ -20092,6 +20109,11 @@ static MYSQL_SYSVAR_BOOL(debug_force_scrubbing, NULL, NULL, FALSE); #endif /* UNIV_DEBUG */ +static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enrypt the temporary table data.", + NULL, NULL, false); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), @@ -20298,6 +20320,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #endif MYSQL_SYSVAR(buf_dump_status_frequency), MYSQL_SYSVAR(background_thread), + MYSQL_SYSVAR(encrypt_temporary_tables), NULL }; diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index c3065d2f0d5..492c4b45ef7 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4816,20 +4816,6 @@ ibuf_print( mutex_exit(&ibuf_mutex); } -/** Check if a page is all zeroes. -@param[in] read_buf database page -@param[in] size page size -@return whether the page is all zeroes */ -static bool buf_page_is_zeroes(const byte* read_buf, const page_size_t& size) -{ - for (ulint i = 0; i < size.physical(); i++) { - if (read_buf[i] != 0) { - return false; - } - } - return true; -} - /** Check the insert buffer bitmaps on IMPORT TABLESPACE. @param[in] trx transaction @param[in,out] space tablespace being imported @@ -4890,7 +4876,7 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) bitmap_page = ibuf_bitmap_get_map_page( page_id_t(space->id, page_no), page_size, &mtr); - if (buf_page_is_zeroes(bitmap_page, page_size)) { + if (buf_page_is_zeroes(bitmap_page, page_size.physical())) { /* This means we got all-zero page instead of ibuf bitmap page. The subsequent page should be all-zero pages. */ @@ -4902,7 +4888,8 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) page_id_t(space->id, curr_page), page_size, RW_S_LATCH, &mtr); page_t* page = buf_block_get_frame(block); - ut_ad(buf_page_is_zeroes(page, page_size)); + ut_ad(buf_page_is_zeroes( + page, page_size.physical())); } #endif /* UNIV_DEBUG */ ibuf_exit(&mtr); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index faf79ac34dc..cbef5475af5 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -646,6 +646,12 @@ buf_block_unfix(buf_block_t* block); # endif /* UNIV_DEBUG */ #endif /* !UNIV_INNOCHECKSUM */ +/** Check if a page is all zeroes. +@param[in] read_buf database page +@param[in] page_size page frame size +@return whether the page is all zeroes */ +bool buf_page_is_zeroes(const void* read_buf, size_t page_size); + /** Checks if the page is in crc32 checksum format. @param[in] read_buf database page @param[in] checksum_field1 new checksum field diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h index 19c1493924d..7a14b022e66 100644 --- a/storage/innobase/include/log0crypt.h +++ b/storage/innobase/include/log0crypt.h @@ -87,7 +87,6 @@ log_crypt(byte* buf, lsn_t lsn, ulint size, bool decrypt = false); @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @param[in] encrypt true=encrypt; false=decrypt @return whether the operation succeeded */ UNIV_INTERN @@ -97,7 +96,6 @@ log_tmp_block_encrypt( ulint size, byte* dst, uint64_t offs, - ulint space_id, bool encrypt = true) MY_ATTRIBUTE((warn_unused_result, nonnull)); @@ -106,7 +104,6 @@ log_tmp_block_encrypt( @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @return whether the operation succeeded */ inline bool @@ -114,10 +111,9 @@ log_tmp_block_decrypt( const byte* src, ulint size, byte* dst, - uint64_t offs, - ulint space_id) + uint64_t offs) { - return(log_tmp_block_encrypt(src, size, dst, offs, space_id, false)); + return(log_tmp_block_encrypt(src, size, dst, offs, false)); } /** @return whether temporary files are encrypted */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 070a8cced0d..61b3e6fea3f 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -187,6 +187,12 @@ struct srv_stats_t /** Number of spaces in keyrotation list */ ulint_ctr_64_t key_rotation_list_length; + + /** Number of temporary tablespace blocks encrypted */ + ulint_ctr_64_t n_temp_blocks_encrypted; + + /** Number of temporary tablespace blocks decrypted */ + ulint_ctr_64_t n_temp_blocks_decrypted; }; extern const char* srv_main_thread_op_info; @@ -475,6 +481,9 @@ extern ulong srv_max_purge_lag; extern ulong srv_max_purge_lag_delay; extern ulong srv_replication_delay; + +extern my_bool innodb_encrypt_temporary_tables; + /*-------------------------------------------*/ /** Modes of operation */ @@ -1042,6 +1051,12 @@ struct export_var_t{ /*!< Number of row log blocks decrypted */ ib_int64_t innodb_n_rowlog_blocks_decrypted; + /* Number of temporary tablespace pages encrypted */ + ib_int64_t innodb_n_temp_blocks_encrypted; + + /* Number of temporary tablespace pages decrypted */ + ib_int64_t innodb_n_temp_blocks_decrypted; + ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ ulint innodb_sec_rec_cluster_reads_avoided;/*!< srv_sec_rec_cluster_reads_avoided */ diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc index d6497d26e14..a2e4ac1dd40 100644 --- a/storage/innobase/log/log0crypt.cc +++ b/storage/innobase/log/log0crypt.cc @@ -61,6 +61,9 @@ struct crypt_info_t { /** The crypt info */ static crypt_info_t info; +/** Initialization vector used for temporary files/tablespace */ +static byte tmp_iv[MY_AES_BLOCK_SIZE]; + /** Crypt info when upgrading from 10.1 */ static crypt_info_t infos[5 * 2]; /** First unused slot in infos[] */ @@ -196,9 +199,6 @@ UNIV_INTERN bool log_crypt_init() { - ut_ad(log_mutex_own()); - ut_ad(log_sys.is_encrypted()); - info.key_version = encryption_key_get_latest_version( LOG_DEFAULT_ENCRYPTION_KEY); @@ -208,7 +208,8 @@ log_crypt_init() return false; } - if (my_random_bytes(info.crypt_msg.bytes, sizeof info.crypt_msg) + if (my_random_bytes(tmp_iv, MY_AES_BLOCK_SIZE) != MY_AES_OK + || my_random_bytes(info.crypt_msg.bytes, sizeof info.crypt_msg) != MY_AES_OK || my_random_bytes(info.crypt_nonce.bytes, sizeof info.crypt_nonce) != MY_AES_OK) { @@ -373,7 +374,6 @@ log_crypt_read_checkpoint_buf(const byte* buf) @param[in] size size of the block @param[out] dst destination block @param[in] offs offset to block -@param[in] space_id tablespace id @param[in] encrypt true=encrypt; false=decrypt @return whether the operation succeeded */ UNIV_INTERN @@ -383,19 +383,18 @@ log_tmp_block_encrypt( ulint size, byte* dst, uint64_t offs, - ulint space_id, bool encrypt) { uint dst_len; - uint64_t aes_ctr_iv[MY_AES_BLOCK_SIZE / sizeof(uint64_t)]; - bzero(aes_ctr_iv, sizeof aes_ctr_iv); - aes_ctr_iv[0] = space_id; - aes_ctr_iv[1] = offs; + uint64_t iv[MY_AES_BLOCK_SIZE / sizeof(uint64_t)]; + iv[0] = offs; + memcpy(iv + 1, tmp_iv, sizeof iv - sizeof *iv); int rc = encryption_crypt( - src, (uint)size, dst, &dst_len, - const_cast(info.crypt_key.bytes), (uint)(sizeof info.crypt_key), - reinterpret_cast(aes_ctr_iv), (uint)(sizeof aes_ctr_iv), + src, uint(size), dst, &dst_len, + const_cast(info.crypt_key.bytes), + uint(sizeof info.crypt_key), + reinterpret_cast(iv), uint(sizeof iv), encrypt ? ENCRYPTION_FLAG_ENCRYPT|ENCRYPTION_FLAG_NOPAD : ENCRYPTION_FLAG_DECRYPT|ENCRYPTION_FLAG_NOPAD, diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 21f041a9bf5..78f283a3fc8 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -434,8 +434,7 @@ row_log_online_op( if (log_tmp_is_encrypted()) { if (!log_tmp_block_encrypt( buf, srv_sort_buf_size, - log->crypt_tail, byte_offset, - index->table->space_id)) { + log->crypt_tail, byte_offset)) { log->error = DB_DECRYPTION_FAILED; goto write_failed; } @@ -2876,8 +2875,7 @@ all_done: if (log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt( buf, srv_sort_buf_size, - index->online_log->crypt_head, - ofs, index->table->space_id)) { + index->online_log->crypt_head, ofs)) { error = DB_DECRYPTION_FAILED; goto func_exit; } @@ -3779,8 +3777,7 @@ all_done: if (log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt( buf, srv_sort_buf_size, - index->online_log->crypt_head, - ofs, index->table->space_id)) { + index->online_log->crypt_head, ofs)) { error = DB_DECRYPTION_FAILED; goto func_exit; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 42d01be732d..fa85a511ba6 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1093,7 +1093,7 @@ row_merge_read( /* If encryption is enabled decrypt buffer */ if (success && log_tmp_is_encrypted()) { if (!log_tmp_block_decrypt(buf, srv_sort_buf_size, - crypt_buf, ofs, space)) { + crypt_buf, ofs)) { return (FALSE); } @@ -1142,7 +1142,7 @@ row_merge_write( if (!log_tmp_block_encrypt(static_cast(buf), buf_len, static_cast(crypt_buf), - ofs, space)) { + ofs)) { return false; } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index dd476970f26..8bbec2e741b 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -430,6 +430,9 @@ my_bool srv_print_innodb_lock_monitor; PRIMARY KEY */ my_bool srv_force_primary_key; +/** Key version to encrypt the temporary tablespace */ +my_bool innodb_encrypt_temporary_tables; + /* Array of English strings describing the current state of an i/o handler thread */ @@ -1598,6 +1601,12 @@ srv_export_innodb_status(void) export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted; export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted; + export_vars.innodb_n_temp_blocks_encrypted = + srv_stats.n_temp_blocks_encrypted; + + export_vars.innodb_n_temp_blocks_decrypted = + srv_stats.n_temp_blocks_decrypted; + export_vars.innodb_defragment_compression_failures = btr_defragment_compression_failures; export_vars.innodb_defragment_failures = btr_defragment_failures; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index aa3557b6aa7..47eaccb6573 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -486,7 +486,7 @@ create_log_files( /* Create a log checkpoint. */ log_mutex_enter(); if (log_sys.is_encrypted() && !log_crypt_init()) { - return(DB_ERROR); + return DB_ERROR; } ut_d(recv_no_log_write = false); log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); @@ -1708,6 +1708,10 @@ dberr_t srv_start(bool create_new_db) srv_log_file_size_requested = srv_log_file_size; + if (innodb_encrypt_temporary_tables && !log_crypt_init()) { + return srv_init_abort(DB_ERROR); + } + if (create_new_db) { buf_flush_sync_all_buf_pools(); diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 6a8d1a559e8..7e71b361d6e 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2708,7 +2708,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) changes to commit (rollback shouldn't be tested). */ DBUG_ASSERT(!thd->get_stmt_da()->is_sent() || - thd->killed == KILL_CONNECTION); + thd->killed); /* autocommit ? rollback a transaction */ #ifdef MARIA_CANNOT_ROLLBACK if (ma_commit(trn)) diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index 7d8f8b8d27c..2697c72f2b8 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -286,7 +286,6 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, We however do a flush here for additional safety. */ /** @todo consider porting these flush-es to MyISAM */ - DBUG_ASSERT(share->reopen == 1); error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE); if (!error && share->changed) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index ea138142316..748f7585c67 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -1,4 +1,5 @@ /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + Copyright (c) 2009, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -249,20 +250,6 @@ err: } /* maria_clone_internal */ -/* Make a clone of a maria table */ - -MARIA_HA *maria_clone(MARIA_SHARE *share, int mode) -{ - MARIA_HA *new_info; - mysql_mutex_lock(&THR_LOCK_maria); - new_info= maria_clone_internal(share, mode, - share->data_file_type == BLOCK_RECORD ? - share->bitmap.file.file : -1, 0); - mysql_mutex_unlock(&THR_LOCK_maria); - return new_info; -} - - /****************************************************************************** open a MARIA table diff --git a/storage/rocksdb/.clang-format b/storage/rocksdb/.clang-format index d80b012dd4b..b1df76bdf2d 100644 --- a/storage/rocksdb/.clang-format +++ b/storage/rocksdb/.clang-format @@ -1,23 +1,49 @@ ---- +# Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2.0, +# as published by the Free Software Foundation. +# +# This program is also distributed with certain software (including +# but not limited to OpenSSL) that is licensed under separate terms, +# as designated in a particular file or component or in included license +# documentation. The authors of MySQL hereby grant you an additional +# permission to link the program and your derivative works with the +# separately licensed software that they have included with MySQL. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License, version 2.0, for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +# This is the output of clang-format-5.0 --style=google --dump-config, +# except for changes mentioned below. We lock the style so that any newer +# version of clang-format will give the same result; as time goes, we may +# update this list, requiring newer versions of clang-format. + Language: Cpp -# BasedOnStyle: LLVM -AccessModifierOffset: -2 +# BasedOnStyle: Google +AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: false +AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: @@ -32,62 +58,80 @@ BraceWrapping: BeforeCatch: false BeforeElse: false IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: false +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true -DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false -ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - - Regex: '^(<|"(gtest|isl|json)/)' - Priority: 3 - - Regex: '.*' + - Regex: '^<.*\.h>' Priority: 1 -IncludeIsMainRegex: '$' -IndentCaseLabels: false + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true IndentWidth: 2 IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PenaltyBreakBeforeFirstCallParameter: 19 +ObjCSpaceBeforeProtocolList: false +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 -PointerAlignment: Right +PenaltyReturnTypeOnItsOwnLine: 200 ReflowComments: true SortIncludes: true +SortUsingDeclarations: true SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 +SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false -Standard: Cpp11 TabWidth: 8 UseTab: Never -JavaScriptQuotes: Leave -... + +# We declare one specific pointer style since right alignment is dominant in +# the MySQL code base (default --style=google has DerivePointerAlignment true). +DerivePointerAlignment: false +PointerAlignment: Right + +# MySQL source code is allowed to use C++11 features. +Standard: Cpp11 diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index 703504a6682..9551ad7f343 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -104,6 +104,8 @@ SET(ROCKSDB_SE_SOURCES rdb_psi.cc rdb_sst_info.cc rdb_sst_info.h + rdb_converter.cc + rdb_converter.h ) # MariaDB: the following is added in build_rocksdb.cmake, when appropriate: @@ -153,6 +155,7 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib rdb_perf_context.h rdb_buff.h rdb_mariadb_port.h + nosql_access.cc nosql_access.h ) ADD_DEPENDENCIES(rocksdb_aux_lib GenError) @@ -163,6 +166,27 @@ if (UNIX AND NOT APPLE) TARGET_LINK_LIBRARIES(rocksdb_aux_lib -lrt) endif() +# IF (WITH_JEMALLOC) +# FIND_LIBRARY(JEMALLOC_LIBRARY +# NAMES libjemalloc${PIC_EXT}.a jemalloc +# HINTS ${WITH_JEMALLOC}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${JEMALLOC_LIBRARY}) +# ADD_DEFINITIONS(-DROCKSDB_JEMALLOC) +# ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) +# ENDIF() + +# MariaDB: Q: why does the upstream add libunwind for a particular +# storage engine? +#IF (WITH_UNWIND) +# FIND_LIBRARY(UNWIND_LIBRARY +# NAMES libunwind${PIC_EXT}.a unwind +# HINTS ${WITH_UNWIND}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${UNWIND_LIBRARY}) +#ENDIF() + + TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib) FIND_LIBRARY(LZ4_LIBRARY NAMES liblz4${PIC_EXT}.a lz4 @@ -191,6 +215,8 @@ ENDIF() CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) IF(HAVE_SCHED_GETCPU) ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1) +# MariaDB: don't do this: +# ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY) ENDIF() IF (WITH_TBB) diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index 0b93ea3c1d4..561e6cdefe6 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -21,11 +21,13 @@ else() if(WITH_ROCKSDB_JEMALLOC) find_package(JeMalloc REQUIRED) add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) include_directories(${JEMALLOC_INCLUDE_DIR}) endif() if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") # FreeBSD has jemaloc as default malloc add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) set(WITH_JEMALLOC ON) endif() endif() @@ -160,7 +162,7 @@ find_package(Threads REQUIRED) if(WIN32) set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) else() - set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT}) + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT} ${LIBDL}) endif() set(ROCKSDB_LIBS rocksdblib}) @@ -178,24 +180,27 @@ set(ROCKSDB_SOURCES db/c.cc db/column_family.cc db/compacted_db_impl.cc - db/compaction.cc - db/compaction_iterator.cc - db/compaction_job.cc - db/compaction_picker.cc - db/compaction_picker_universal.cc + db/compaction/compaction.cc + db/compaction/compaction_iterator.cc + db/compaction/compaction_job.cc + db/compaction/compaction_picker.cc + db/compaction/compaction_picker_fifo.cc + db/compaction/compaction_picker_level.cc + db/compaction/compaction_picker_universal.cc db/convenience.cc db/db_filesnapshot.cc - db/db_impl.cc - db/db_impl_compaction_flush.cc - db/db_impl_debug.cc - db/db_impl_experimental.cc - db/db_impl_files.cc - db/db_impl_open.cc - db/db_impl_readonly.cc - db/db_impl_write.cc + db/dbformat.cc + db/db_impl/db_impl.cc + db/db_impl/db_impl_compaction_flush.cc + db/db_impl/db_impl_debug.cc + db/db_impl/db_impl_experimental.cc + db/db_impl/db_impl_files.cc + db/db_impl/db_impl_open.cc + db/db_impl/db_impl_readonly.cc + db/db_impl/db_impl_secondary.cc + db/db_impl/db_impl_write.cc db/db_info_dumper.cc db/db_iter.cc - db/dbformat.cc db/error_handler.cc db/event_helpers.cc db/experimental.cc @@ -204,17 +209,18 @@ set(ROCKSDB_SOURCES db/flush_job.cc db/flush_scheduler.cc db/forward_iterator.cc + db/in_memory_stats_history.cc db/internal_stats.cc db/log_reader.cc - db/log_writer.cc db/logs_with_prep_tracker.cc + db/log_writer.cc db/malloc_stats.cc - db/managed_iterator.cc db/memtable.cc db/memtable_list.cc db/merge_helper.cc db/merge_operator.cc db/range_del_aggregator.cc + db/range_tombstone_fragmenter.cc db/repair.cc db/snapshot_impl.cc db/table_cache.cc @@ -224,24 +230,29 @@ set(ROCKSDB_SOURCES db/version_edit.cc db/version_set.cc db/wal_manager.cc - db/write_batch.cc db/write_batch_base.cc + db/write_batch.cc db/write_controller.cc db/write_thread.cc env/env.cc env/env_chroot.cc env/env_hdfs.cc env/mock_env.cc + file/delete_scheduler.cc + file/filename.cc + file/file_util.cc + file/sst_file_manager_impl.cc + logging/auto_roll_logger.cc + logging/event_logger.cc + logging/log_buffer.cc + memory/arena.cc + memory/concurrent_arena.cc + memory/jemalloc_nodump_allocator.cc memtable/alloc_tracker.cc - memtable/hash_cuckoo_rep.cc - memtable/hash_cuckoo_rep.cc - memtable/hash_linklist_rep.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc - memtable/hash_skiplist_rep.cc + memtable/memtablerep_bench.cc memtable/skiplistrep.cc - memtable/skiplistrep.cc - memtable/vectorrep.cc memtable/vectorrep.cc memtable/write_buffer_manager.cc monitoring/histogram.cc @@ -253,6 +264,7 @@ set(ROCKSDB_SOURCES monitoring/statistics.cc monitoring/thread_status_impl.cc monitoring/thread_status_updater.cc + monitoring/thread_status_updater_debug.cc monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc @@ -262,108 +274,101 @@ set(ROCKSDB_SOURCES options/options_parser.cc options/options_sanity_check.cc port/stack_trace.cc - table/adaptive_table_factory.cc - table/block.cc - table/block_based_filter_block.cc - table/block_based_table_builder.cc - table/block_based_table_factory.cc - table/block_based_table_reader.cc - table/block_builder.cc + table/adaptive/adaptive_table_factory.cc + table/block_based/block_based_filter_block.cc + table/block_based/block_based_table_builder.cc + table/block_based/block_based_table_factory.cc + table/block_based/block_based_table_reader.cc + table/block_based/block_builder.cc + table/block_based/block.cc + table/block_based/block_prefix_index.cc + table/block_based/data_block_footer.cc + table/block_based/data_block_hash_index.cc + table/block_based/flush_block_policy.cc + table/block_based/full_filter_block.cc + table/block_based/index_builder.cc + table/block_based/partitioned_filter_block.cc table/block_fetcher.cc - table/block_prefix_index.cc table/bloom_block.cc - table/cuckoo_table_builder.cc - table/cuckoo_table_factory.cc - table/cuckoo_table_reader.cc - table/flush_block_policy.cc + table/cuckoo/cuckoo_table_builder.cc + table/cuckoo/cuckoo_table_factory.cc + table/cuckoo/cuckoo_table_reader.cc table/format.cc - table/full_filter_block.cc table/get_context.cc - table/index_builder.cc table/iterator.cc table/merging_iterator.cc table/meta_blocks.cc - table/partitioned_filter_block.cc + table/mock_table.cc table/persistent_cache_helper.cc - table/plain_table_builder.cc - table/plain_table_factory.cc - table/plain_table_index.cc - table/plain_table_key_coding.cc - table/plain_table_reader.cc + table/plain/plain_table_builder.cc + table/plain/plain_table_factory.cc + table/plain/plain_table_index.cc + table/plain/plain_table_key_coding.cc + table/plain/plain_table_reader.cc + table/sst_file_reader.cc table/sst_file_writer.cc table/table_properties.cc + table/table_reader_bench.cc table/two_level_iterator.cc - tools/db_bench_tool.cc - tools/dump/db_dump_tool.cc + test_util/sync_point.cc + test_util/sync_point_impl.cc tools/ldb_cmd.cc tools/ldb_tool.cc tools/sst_dump_tool.cc - util/arena.cc - util/auto_roll_logger.cc + trace_replay/trace_replay.cc util/bloom.cc util/coding.cc util/compaction_job_stats_impl.cc util/comparator.cc util/compression_context_cache.cc - util/concurrent_arena.cc + util/concurrent_task_limiter_impl.cc + util/crc32c_arm64.cc util/crc32c.cc - util/delete_scheduler.cc util/dynamic_bloom.cc - util/event_logger.cc util/file_reader_writer.cc - util/file_util.cc - util/filename.cc util/filter_policy.cc util/hash.cc - util/log_buffer.cc - util/murmurhash.cc - util/random.cc - util/rate_limiter.cc - util/slice.cc - util/sst_file_manager_impl.cc - util/status.cc - util/status_message.cc - util/string_util.cc - util/sync_point.cc - util/sync_point_impl.cc - util/testutil.cc - util/thread_local.cc - util/threadpool_imp.cc - util/transaction_test_util.cc - util/xxhash.cc utilities/backupable/backupable_db.cc + utilities/blob_db/blob_compaction_filter.cc utilities/blob_db/blob_db.cc + utilities/blob_db/blob_db_impl.cc + utilities/blob_db/blob_db_impl_filesnapshot.cc + utilities/blob_db/blob_dump_tool.cc + utilities/blob_db/blob_file.cc + utilities/blob_db/blob_log_format.cc + utilities/blob_db/blob_log_reader.cc + utilities/blob_db/blob_log_writer.cc + utilities/cassandra/cassandra_compaction_filter.cc + utilities/cassandra/format.cc + utilities/cassandra/merge_operator.cc + utilities/cassandra/test_utils.cc utilities/checkpoint/checkpoint_impl.cc - utilities/col_buf_decoder.cc - utilities/col_buf_encoder.cc - utilities/column_aware_encoding_util.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc - utilities/date_tiered/date_tiered_db_impl.cc + utilities/convenience/info_log_finder.cc utilities/debug.cc - utilities/document/document_db.cc - utilities/document/json_document.cc - utilities/document/json_document_builder.cc utilities/env_mirror.cc - utilities/geodb/geodb_impl.cc + utilities/env_timed.cc utilities/leveldb_options/leveldb_options.cc - utilities/lua/rocks_lua_compaction_filter.cc utilities/memory/memory_util.cc + utilities/merge_operators/bytesxor.cc utilities/merge_operators/max.cc utilities/merge_operators/put.cc - utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/string_append/stringappend2.cc + utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/uint64add.cc utilities/option_change_migration/option_change_migration.cc utilities/options/options_util.cc utilities/persistent_cache/block_cache_tier.cc utilities/persistent_cache/block_cache_tier_file.cc utilities/persistent_cache/block_cache_tier_metadata.cc + utilities/persistent_cache/hash_table_bench.cc + utilities/persistent_cache/persistent_cache_bench.cc utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/volatile_tier_impl.cc - utilities/redis/redis_lists.cc utilities/simulator_cache/sim_cache.cc - utilities/spatialdb/spatial_db.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc + utilities/trace/file_trace_reader_writer.cc + utilities/transactions/optimistic_transaction.cc utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/pessimistic_transaction.cc utilities/transactions/pessimistic_transaction_db.cc @@ -379,8 +384,19 @@ set(ROCKSDB_SOURCES utilities/ttl/db_ttl_impl.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc + util/log_write_bench.cc + util/murmurhash.cc + util/random.cc + util/rate_limiter.cc + util/slice.cc + util/status.cc + util/string_util.cc + util/thread_local.cc + util/threadpool_imp.cc + util/xxhash.cc ) + if(WIN32) list(APPEND ROCKSDB_SOURCES port/win/io_win.cc diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc index 941f00cac60..4995e407e22 100644 --- a/storage/rocksdb/event_listener.cc +++ b/storage/rocksdb/event_listener.cc @@ -33,9 +33,9 @@ namespace myrocks { -static std::vector -extract_index_stats(const std::vector &files, - const rocksdb::TablePropertiesCollection &props) { +static std::vector extract_index_stats( + const std::vector &files, + const rocksdb::TablePropertiesCollection &props) { std::vector ret; for (auto fn : files) { const auto it = props.find(fn); @@ -86,10 +86,11 @@ void Rdb_event_listener::OnExternalFileIngested( void Rdb_event_listener::OnBackgroundError( rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) { rdb_log_status_error(*status, "Error detected in background"); + // NO_LINT_DEBUG sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason); if (status->IsCorruption()) { rdb_persist_corruption_marker(); abort(); } } -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h index f666c876eac..737973eb9da 100644 --- a/storage/rocksdb/event_listener.h +++ b/storage/rocksdb/event_listener.h @@ -22,7 +22,7 @@ namespace myrocks { class Rdb_ddl_manager; class Rdb_event_listener : public rocksdb::EventListener { -public: + public: Rdb_event_listener(const Rdb_event_listener &) = delete; Rdb_event_listener &operator=(const Rdb_event_listener &) = delete; @@ -40,10 +40,10 @@ public: void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) override; -private: + private: Rdb_ddl_manager *m_ddl_manager; void update_index_stats(const rocksdb::TableProperties &props); }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 2837d31d37a..0162b774ad6 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -15,7 +15,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation +#pragma implementation // gcc: Class implementation #endif #define MYSQL_SERVER 1 @@ -31,8 +31,8 @@ #include "./ha_rocksdb.h" /* C++ standard header files */ -#include #include +#include #include #include #include @@ -41,6 +41,9 @@ #include /* MySQL includes */ +#include +// #include +#include #include "./debug_sync.h" #include "./my_bit.h" #include "./my_stacktrace.h" @@ -48,20 +51,14 @@ #include "./sql_audit.h" #include "./sql_table.h" #include "./sql_hset.h" -#include #ifdef MARIAROCKS_NOT_YET -#include #endif -#include - -// Both MySQL and RocksDB define the same constant. To avoid compilation errors -// till we make the fix in RocksDB, we'll temporary undefine it here. -#undef CACHE_LINE_SIZE /* RocksDB includes */ #include "monitoring/histogram.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" +#include "rocksdb/memory_allocator.h" #include "rocksdb/persistent_cache.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" @@ -70,6 +67,7 @@ #include "rocksdb/utilities/convenience.h" #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/sim_cache.h" +#include "rocksdb/utilities/write_batch_with_index.h" #include "util/stop_watch.h" #include "./rdb_source_revision.h" @@ -77,8 +75,10 @@ #include "./event_listener.h" #include "./ha_rocksdb_proto.h" #include "./logger.h" +#include "./nosql_access.h" #include "./rdb_cf_manager.h" #include "./rdb_cf_options.h" +#include "./rdb_converter.h" #include "./rdb_datadic.h" #include "./rdb_i_s.h" #include "./rdb_index_merge.h" @@ -100,18 +100,19 @@ void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all); * Get the user thread's binary logging format * @param thd user thread * @return Value to be used as index into the binlog_format_names array -*/ + */ int thd_binlog_format(const MYSQL_THD thd); /** * Check if binary logging is filtered for thread's current db. * @param thd Thread handle * @retval 1 the query is not filtered, 0 otherwise. -*/ + */ bool thd_binlog_filter_ok(const MYSQL_THD thd); } MYSQL_PLUGIN_IMPORT bool my_disable_leak_check; +extern my_bool opt_core_file; // Needed in rocksdb_init_func void ignore_db_dirs_append(const char *dirname_arg); @@ -128,22 +129,14 @@ const std::string DEFAULT_CF_NAME("default"); const std::string DEFAULT_SYSTEM_CF_NAME("__system__"); const std::string PER_INDEX_CF_NAME("$per_index_cf"); -class Rdb_explicit_snapshot; - -std::mutex explicit_snapshot_mutex; -ulonglong explicit_snapshot_counter = 0; -std::unordered_map> - explicit_snapshots; static std::vector rdb_indexes_to_recalc; #ifdef MARIADB_NOT_YET class Rdb_explicit_snapshot : public explicit_snapshot { - std::unique_ptr snapshot; - public: - static std::shared_ptr - create(snapshot_info_st *ss_info, rocksdb::DB *db, - const rocksdb::Snapshot *snapshot) { + static std::shared_ptr create( + snapshot_info_st *ss_info, rocksdb::DB *db, + const rocksdb::Snapshot *snapshot) { std::lock_guard lock(explicit_snapshot_mutex); auto s = std::unique_ptr( new rocksdb::ManagedSnapshot(db, snapshot)); @@ -159,8 +152,24 @@ class Rdb_explicit_snapshot : public explicit_snapshot { return ret; } - static std::shared_ptr - get(const ulonglong snapshot_id) { + static std::string dump_snapshots() { + std::string str; + std::lock_guard lock(explicit_snapshot_mutex); + for (const auto &elem : explicit_snapshots) { + const auto &ss = elem.second.lock(); + DBUG_ASSERT(ss != nullptr); + const auto &info = ss->ss_info; + str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) + + "\nBinlog File: " + info.binlog_file + + "\nBinlog Pos: " + std::to_string(info.binlog_pos) + + "\nGtid Executed: " + info.gtid_executed + "\n"; + } + + return str; + } + + static std::shared_ptr get( + const ulonglong snapshot_id) { std::lock_guard lock(explicit_snapshot_mutex); auto elem = explicit_snapshots.find(snapshot_id); if (elem == explicit_snapshots.end()) { @@ -172,14 +181,27 @@ class Rdb_explicit_snapshot : public explicit_snapshot { rocksdb::ManagedSnapshot *get_snapshot() { return snapshot.get(); } Rdb_explicit_snapshot(snapshot_info_st ss_info, - std::unique_ptr snapshot) + std::unique_ptr &&snapshot) : explicit_snapshot(ss_info), snapshot(std::move(snapshot)) {} virtual ~Rdb_explicit_snapshot() { std::lock_guard lock(explicit_snapshot_mutex); explicit_snapshots.erase(ss_info.snapshot_id); } + + private: + std::unique_ptr snapshot; + + static std::mutex explicit_snapshot_mutex; + static ulonglong explicit_snapshot_counter; + static std::unordered_map> + explicit_snapshots; }; + +std::mutex Rdb_explicit_snapshot::explicit_snapshot_mutex; +ulonglong Rdb_explicit_snapshot::explicit_snapshot_counter = 0; +std::unordered_map> + Rdb_explicit_snapshot::explicit_snapshots; #endif /** @@ -188,10 +210,11 @@ class Rdb_explicit_snapshot : public explicit_snapshot { void ha_rocksdb::update_row_stats(const operation_type &type) { DBUG_ASSERT(type < ROWS_MAX); // Find if we are modifying system databases. - if (table->s && m_tbl_def->m_is_mysql_system_table) + if (table->s && m_tbl_def->m_is_mysql_system_table) { global_stats.system_rows[type].inc(); - else + } else { global_stats.rows[type].inc(); + } } void dbug_dump_database(rocksdb::DB *db); @@ -199,8 +222,8 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton, my_core::TABLE_SHARE *table_arg, my_core::MEM_ROOT *mem_root); -static rocksdb::CompactRangeOptions -getCompactRangeOptions(int concurrency = 0) { +static rocksdb::CompactRangeOptions getCompactRangeOptions( + int concurrency = 0) { rocksdb::CompactRangeOptions compact_range_options; compact_range_options.bottommost_level_compaction = rocksdb::BottommostLevelCompaction::kForce; @@ -261,37 +284,76 @@ static void rocksdb_flush_all_memtables() { } } +static void rocksdb_delete_column_family_stub( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, const void *const /* save */) {} + +static int rocksdb_delete_column_family( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, struct st_mysql_value *const value) { + // Return failure for now until the race condition between creating + // CF and deleting CF is resolved + return HA_EXIT_FAILURE; + + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + + DBUG_ASSERT(value != nullptr); + + if (const char *const cf = value->val_str(value, buff, &len)) { + auto &cf_manager = rdb_get_cf_manager(); + auto ret = cf_manager.drop_cf(cf); + if (ret == HA_EXIT_SUCCESS) { + // NO_LINT_DEBUG + sql_print_information("RocksDB: Dropped column family: %s\n", cf); + } else { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to drop column family: %s, error: %d\n", + cf, ret); + } + + return ret; + } + + return HA_EXIT_SUCCESS; +} + /////////////////////////////////////////////////////////// // Hash map: table name => open table handler /////////////////////////////////////////////////////////// -namespace // anonymous namespace = not visible outside this source file +namespace // anonymous namespace = not visible outside this source file { -const ulong TABLE_HASH_SIZE = 32; typedef Hash_set Rdb_table_set; -struct Rdb_open_tables_map { +class Rdb_open_tables_map { + private: /* Hash table used to track the handlers of open tables */ - Rdb_table_set m_hash; + std::unordered_map m_table_map; + /* The mutex used to protect the hash table */ mutable mysql_mutex_t m_mutex; - static uchar *get_hash_key(const Rdb_table_handler *const table_handler, - size_t *const length, - my_bool not_used MY_ATTRIBUTE((__unused__))); + public: + void init() { + m_table_map.clear(); + mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST); + } + + void free() { + m_table_map.clear(); + mysql_mutex_destroy(&m_mutex); + } + size_t count() { return m_table_map.size(); } Rdb_table_handler *get_table_handler(const char *const table_name); void release_table_handler(Rdb_table_handler *const table_handler); - Rdb_open_tables_map() : m_hash(get_hash_key, system_charset_info) { } - - void free_hash(void) { m_hash.~Rdb_table_set(); } - std::vector get_table_names(void) const; }; -} // anonymous namespace +} // anonymous namespace static Rdb_open_tables_map rdb_open_tables; @@ -326,6 +388,7 @@ static int rocksdb_create_checkpoint( status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str()); delete checkpoint; if (status.ok()) { + // NO_LINT_DEBUG sql_print_information( "RocksDB: created checkpoint in directory : %s\n", checkpoint_dir.c_str()); @@ -355,6 +418,7 @@ static void rocksdb_force_flush_memtable_now_stub( static int rocksdb_force_flush_memtable_now( THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, struct st_mysql_value *const value) { + // NO_LINT_DEBUG sql_print_information("RocksDB: Manual memtable flush."); rocksdb_flush_all_memtables(); return HA_EXIT_SUCCESS; @@ -367,6 +431,7 @@ static void rocksdb_force_flush_memtable_and_lzero_now_stub( static int rocksdb_force_flush_memtable_and_lzero_now( THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, struct st_mysql_value *const value) { + // NO_LINT_DEBUG sql_print_information("RocksDB: Manual memtable and L0 flush."); rocksdb_flush_all_memtables(); @@ -375,29 +440,46 @@ static int rocksdb_force_flush_memtable_and_lzero_now( rocksdb::ColumnFamilyMetaData metadata; rocksdb::ColumnFamilyDescriptor cf_descr; + int i, max_attempts = 3, num_errors = 0; + for (const auto &cf_handle : cf_manager.get_all_cf()) { - rdb->GetColumnFamilyMetaData(cf_handle, &metadata); - cf_handle->GetDescriptor(&cf_descr); - c_options.output_file_size_limit = cf_descr.options.target_file_size_base; + for (i = 0; i < max_attempts; i++) { + rdb->GetColumnFamilyMetaData(cf_handle, &metadata); + cf_handle->GetDescriptor(&cf_descr); + c_options.output_file_size_limit = cf_descr.options.target_file_size_base; - DBUG_ASSERT(metadata.levels[0].level == 0); - std::vector file_names; - for (auto &file : metadata.levels[0].files) { - file_names.emplace_back(file.db_path + file.name); - } + DBUG_ASSERT(metadata.levels[0].level == 0); + std::vector file_names; + for (auto &file : metadata.levels[0].files) { + file_names.emplace_back(file.db_path + file.name); + } + + if (file_names.empty()) { + break; + } - if (!file_names.empty()) { rocksdb::Status s; s = rdb->CompactFiles(c_options, cf_handle, file_names, 1); + // Due to a race, it's possible for CompactFiles to collide + // with auto compaction, causing an error to return + // regarding file not found. In that case, retry. + if (s.IsInvalidArgument()) { + continue; + } + if (!s.ok() && !s.IsAborted()) { rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); return HA_EXIT_FAILURE; } + break; + } + if (i == max_attempts) { + num_errors++; } } - return HA_EXIT_SUCCESS; + return num_errors == 0 ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE; } static void rocksdb_drop_index_wakeup_thread( @@ -468,11 +550,9 @@ static void rocksdb_set_update_cf_options(THD *thd, struct st_mysql_sys_var *var, void *var_ptr, const void *save); -static int rocksdb_check_bulk_load(THD *const thd, - struct st_mysql_sys_var *var - MY_ATTRIBUTE((__unused__)), - void *save, - struct st_mysql_value *value); +static int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); static int rocksdb_check_bulk_load_allow_unsorted( THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), @@ -499,6 +579,8 @@ static int rocksdb_validate_set_block_cache_size( static long long rocksdb_block_cache_size; static long long rocksdb_sim_cache_size; static my_bool rocksdb_use_clock_cache; +static double rocksdb_cache_high_pri_pool_ratio; +static my_bool rocksdb_cache_dump; /* Use unsigned long long instead of uint64_t because of MySQL compatibility */ static unsigned long long // NOLINT(runtime/int) rocksdb_rate_limiter_bytes_per_sec; @@ -518,8 +600,10 @@ static my_bool rocksdb_force_compute_memtable_stats; static uint32_t rocksdb_force_compute_memtable_stats_cachetime; static my_bool rocksdb_debug_optimizer_no_zero_cardinality; static uint32_t rocksdb_wal_recovery_mode; +static uint32_t rocksdb_stats_level; static uint32_t rocksdb_access_hint_on_compaction_start; static char *rocksdb_compact_cf_name; +static char *rocksdb_delete_cf_name; static char *rocksdb_checkpoint_name; static my_bool rocksdb_signal_drop_index_thread; static my_bool rocksdb_signal_remove_mariabackup_checkpoint; @@ -555,10 +639,25 @@ char *compression_types_val= const_cast(get_rocksdb_supported_compression_types()); static unsigned long rocksdb_write_policy = rocksdb::TxnDBWritePolicy::WRITE_COMMITTED; + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +char *rocksdb_read_free_rpl_tables; +std::mutex rocksdb_read_free_rpl_tables_mutex; +#if defined(HAVE_PSI_INTERFACE) +Regex_list_handler rdb_read_free_regex_handler(key_rwlock_read_free_rpl_tables); +#else +Regex_list_handler rdb_read_free_regex_handler; +#endif +enum read_free_rpl_type { OFF = 0, PK_ONLY, PK_SK }; +static unsigned long rocksdb_read_free_rpl = read_free_rpl_type::OFF; +#endif + static my_bool rocksdb_error_on_suboptimal_collation = 1; static uint32_t rocksdb_stats_recalc_rate = 0; static uint32_t rocksdb_debug_manual_compaction_delay = 0; static uint32_t rocksdb_max_manual_compactions = 0; +static my_bool rocksdb_rollback_on_timeout = FALSE; +static my_bool rocksdb_enable_insert_with_update_caching = TRUE; std::atomic rocksdb_row_lock_deadlocks(0); std::atomic rocksdb_row_lock_wait_timeouts(0); @@ -566,6 +665,9 @@ std::atomic rocksdb_snapshot_conflict_errors(0); std::atomic rocksdb_wal_group_syncs(0); std::atomic rocksdb_manual_compactions_processed(0); std::atomic rocksdb_manual_compactions_running(0); +#ifndef DBUG_OFF +std::atomic rocksdb_num_get_for_update_calls(0); +#endif @@ -635,7 +737,7 @@ static std::unique_ptr rdb_init_rocksdb_db_options(void) { o->listeners.push_back(std::make_shared(&ddl_manager)); o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; - o->max_open_files = -2; // auto-tune to 50% open_files_limit + o->max_open_files = -2; // auto-tune to 50% open_files_limit o->two_write_queues = true; o->manual_wal_flush = true; @@ -659,6 +761,15 @@ static TYPELIB write_policy_typelib = {array_elements(write_policy_names) - 1, "write_policy_typelib", write_policy_names, nullptr}; +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +/* This array needs to be kept up to date with myrocks::read_free_rpl_type */ +static const char *read_free_rpl_names[] = {"OFF", "PK_ONLY", "PK_SK", NullS}; + +static TYPELIB read_free_rpl_typelib = {array_elements(read_free_rpl_names) - 1, + "read_free_rpl_typelib", + read_free_rpl_names, nullptr}; +#endif + /* This enum needs to be kept up to date with rocksdb::InfoLogLevel */ static const char *info_log_level_names[] = {"debug_level", "info_level", "warn_level", "error_level", @@ -680,6 +791,23 @@ static void rocksdb_set_rocksdb_info_log_level( RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +static void rocksdb_set_rocksdb_stats_level(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + rocksdb_db_options->statistics->set_stats_level( + static_cast( + *static_cast(save))); + // Actual stats level is defined at rocksdb dbopt::statistics::stats_level_ + // so adjusting rocksdb_stats_level here to make sure it points to + // the correct stats level. + rocksdb_stats_level = rocksdb_db_options->statistics->get_stats_level(); + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + static void rocksdb_set_reset_stats( my_core::THD *const /* unused */, my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), @@ -804,7 +932,7 @@ static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG, static MYSQL_THDVAR_BOOL( commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG, "TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr, - nullptr, FALSE); + nullptr, TRUE); static MYSQL_THDVAR_BOOL( trace_sst_api, PLUGIN_VAR_RQCMDARG, @@ -844,10 +972,11 @@ static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC, "Directory for temporary files during DDL operations.", nullptr, nullptr, ""); +#define DEFAULT_SKIP_UNIQUE_CHECK_TABLES ".*" static MYSQL_THDVAR_STR( skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, "Skip unique constraint checking for the specified tables", nullptr, - nullptr, ".*"); + nullptr, DEFAULT_SKIP_UNIQUE_CHECK_TABLES); static MYSQL_THDVAR_BOOL( commit_in_the_middle, PLUGIN_VAR_RQCMDARG, @@ -861,11 +990,83 @@ static MYSQL_THDVAR_BOOL( " Blind delete is disabled if the table has secondary key", nullptr, nullptr, FALSE); -static MYSQL_THDVAR_STR( - read_free_rpl_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + +static const char *DEFAULT_READ_FREE_RPL_TABLES = ".*"; + +static int rocksdb_validate_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *save, + struct st_mysql_value *value) { + char buff[STRING_BUFFER_USUAL_SIZE]; + int length = sizeof(buff); + const char *wlist_buf = value->val_str(value, buff, &length); + const auto wlist = wlist_buf ? wlist_buf : DEFAULT_READ_FREE_RPL_TABLES; + +#if defined(HAVE_PSI_INTERFACE) + Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables); +#else + Regex_list_handler regex_handler; +#endif + + if (!regex_handler.set_patterns(wlist)) { + warn_about_bad_patterns(®ex_handler, "rocksdb_read_free_rpl_tables"); + return HA_EXIT_FAILURE; + } + + *static_cast(save) = my_strdup(wlist, MYF(MY_WME)); + return HA_EXIT_SUCCESS; +} + +static void rocksdb_update_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr, + const void *save) { + const auto wlist = *static_cast(save); + DBUG_ASSERT(wlist != nullptr); + + // This is bound to succeed since we've already checked for bad patterns in + // rocksdb_validate_read_free_rpl_tables + rdb_read_free_regex_handler.set_patterns(wlist); + + // update all table defs + struct Rdb_read_free_rpl_updater : public Rdb_tables_scanner { + int add_table(Rdb_tbl_def *tdef) override { + tdef->check_and_set_read_free_rpl_table(); + return HA_EXIT_SUCCESS; + } + } updater; + ddl_manager.scan_for_tables(&updater); + + if (wlist == DEFAULT_READ_FREE_RPL_TABLES) { + // If running SET var = DEFAULT, then rocksdb_validate_read_free_rpl_tables + // isn't called, and memory is never allocated for the value. Allocate it + // here. + *static_cast(var_ptr) = my_strdup(wlist, MYF(MY_WME)); + } else { + // Otherwise, we just reuse the value allocated from + // rocksdb_validate_read_free_rpl_tables. + *static_cast(var_ptr) = wlist; + } +} + +static MYSQL_SYSVAR_STR( + read_free_rpl_tables, rocksdb_read_free_rpl_tables, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /*| PLUGIN_VAR_ALLOCATED*/, "List of tables that will use read-free replication on the slave " "(i.e. not lookup a row during replication)", - nullptr, nullptr, ""); + rocksdb_validate_read_free_rpl_tables, rocksdb_update_read_free_rpl_tables, + DEFAULT_READ_FREE_RPL_TABLES); + +static MYSQL_SYSVAR_ENUM( + read_free_rpl, rocksdb_read_free_rpl, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "Use read-free replication on the slave (i.e. no row lookup during " + "replication). Default is OFF, PK_SK will enable it on all tables with " + "primary key. PK_ONLY will enable it on tables where the only key is the " + "primary key (i.e. no secondary keys).", + nullptr, nullptr, read_free_rpl_type::OFF, &read_free_rpl_typelib); +#endif static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG, "Skip using bloom filter for reads", nullptr, nullptr, @@ -1033,6 +1234,14 @@ static MYSQL_SYSVAR_UINT( /* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords, /* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0); +static MYSQL_SYSVAR_UINT( + stats_level, rocksdb_stats_level, PLUGIN_VAR_RQCMDARG, + "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)", + nullptr, rocksdb_set_rocksdb_stats_level, + /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* max */ (uint)rocksdb::StatsLevel::kAll, 0); + static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size, rocksdb_db_options->compaction_readahead_size, PLUGIN_VAR_RQCMDARG, @@ -1107,7 +1316,8 @@ static MYSQL_SYSVAR_ULONG( persistent_cache_size_mb, rocksdb_persistent_cache_size_mb, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Size of cache in MB for BlockBasedTableOptions::persistent_cache " - "for RocksDB", nullptr, nullptr, rocksdb_persistent_cache_size_mb, + "for RocksDB", + nullptr, nullptr, rocksdb_persistent_cache_size_mb, /* min */ 0L, /* max */ ULONG_MAX, 0); static MYSQL_SYSVAR_UINT64_T( @@ -1286,7 +1496,7 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size, rocksdb_validate_set_block_cache_size, nullptr, /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE, /* min */ RDB_MIN_BLOCK_CACHE_SIZE, - /* max */ LONGLONG_MAX, + /* max */ LLONG_MAX, /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, @@ -1295,15 +1505,26 @@ static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, nullptr, /* default */ 0, /* min */ 0, - /* max */ LONGLONG_MAX, + /* max */ LLONG_MAX, /* Block size */ 0); static MYSQL_SYSVAR_BOOL( - use_clock_cache, - rocksdb_use_clock_cache, + use_clock_cache, rocksdb_use_clock_cache, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Use ClockCache instead of default LRUCache for RocksDB", - nullptr, nullptr, false); + "Use ClockCache instead of default LRUCache for RocksDB", nullptr, nullptr, + false); + +static MYSQL_SYSVAR_BOOL(cache_dump, rocksdb_cache_dump, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Include RocksDB block cache content in core dump.", + nullptr, nullptr, true); + +static MYSQL_SYSVAR_DOUBLE(cache_high_pri_pool_ratio, + rocksdb_cache_high_pri_pool_ratio, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Specify the size of block cache high-pri pool", + nullptr, nullptr, /* default */ 0.0, /* min */ 0.0, + /* max */ 1.0, 0); static MYSQL_SYSVAR_BOOL( cache_index_and_filter_blocks, @@ -1313,6 +1534,14 @@ static MYSQL_SYSVAR_BOOL( "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB", nullptr, nullptr, true); +static MYSQL_SYSVAR_BOOL( + cache_index_and_filter_with_high_priority, + *reinterpret_cast( + &rocksdb_tbl_options->cache_index_and_filter_blocks_with_high_priority), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "cache_index_and_filter_blocks_with_high_priority for RocksDB", nullptr, + nullptr, true); + // When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the // LRU cache, but will always keep the filter & idndex block's handle checked // out (=won't call ShardedLRUCache::Release), plus the parsed out objects @@ -1441,10 +1670,10 @@ static MYSQL_SYSVAR_UINT( nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0); static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats, - rocksdb_force_compute_memtable_stats, - PLUGIN_VAR_RQCMDARG, - "Force to always compute memtable stats", - nullptr, nullptr, TRUE); + rocksdb_force_compute_memtable_stats, + PLUGIN_VAR_RQCMDARG, + "Force to always compute memtable stats", nullptr, + nullptr, TRUE); static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime, rocksdb_force_compute_memtable_stats_cachetime, @@ -1464,6 +1693,10 @@ static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name, rocksdb_compact_column_family, rocksdb_compact_column_family_stub, ""); +static MYSQL_SYSVAR_STR(delete_cf, rocksdb_delete_cf_name, PLUGIN_VAR_RQCMDARG, + "Delete column family", rocksdb_delete_column_family, + rocksdb_delete_column_family_stub, ""); + static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name, PLUGIN_VAR_RQCMDARG, "Checkpoint directory", rocksdb_create_checkpoint, @@ -1535,6 +1768,12 @@ static MYSQL_SYSVAR_UINT( "Maximum number of pending + ongoing number of manual compactions.", nullptr, nullptr, /* default */ 10, /* min */ 0, /* max */ UINT_MAX, 0); +static MYSQL_SYSVAR_BOOL( + rollback_on_timeout, rocksdb_rollback_on_timeout, PLUGIN_VAR_OPCMDARG, + "Whether to roll back the complete transaction or a single statement on " + "lock wait timeout (a single statement by default)", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_UINT( debug_manual_compaction_delay, rocksdb_debug_manual_compaction_delay, PLUGIN_VAR_RQCMDARG, @@ -1626,7 +1865,7 @@ static MYSQL_SYSVAR_LONGLONG( rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG, "Minimum file size required for compaction_sequential_deletes", nullptr, rocksdb_set_compaction_options, 0L, - /* min */ -1L, /* max */ LONGLONG_MAX, 0); + /* min */ -1L, /* max */ LLONG_MAX, 0); static MYSQL_SYSVAR_BOOL( compaction_sequential_deletes_count_sd, @@ -1731,6 +1970,13 @@ static MYSQL_SYSVAR_BOOL(error_on_suboptimal_collation, "collation is used", nullptr, nullptr, TRUE); +static MYSQL_SYSVAR_BOOL( + enable_insert_with_update_caching, + rocksdb_enable_insert_with_update_caching, PLUGIN_VAR_OPCMDARG, + "Whether to enable optimization where we cache the read from a failed " + "insertion attempt in INSERT ON DUPLICATE KEY UPDATE", + nullptr, nullptr, TRUE); + static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { @@ -1748,7 +1994,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(trace_sst_api), MYSQL_SYSVAR(commit_in_the_middle), MYSQL_SYSVAR(blind_delete_primary_key), +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported MYSQL_SYSVAR(read_free_rpl_tables), + MYSQL_SYSVAR(read_free_rpl), +#endif MYSQL_SYSVAR(bulk_load_size), MYSQL_SYSVAR(merge_buf_size), MYSQL_SYSVAR(enable_bulk_load_api), @@ -1800,6 +2049,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(enable_thread_tracking), MYSQL_SYSVAR(perf_context_level), MYSQL_SYSVAR(wal_recovery_mode), + MYSQL_SYSVAR(stats_level), MYSQL_SYSVAR(access_hint_on_compaction_start), MYSQL_SYSVAR(new_table_reader_for_compaction_inputs), MYSQL_SYSVAR(compaction_readahead_size), @@ -1809,7 +2059,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(block_cache_size), MYSQL_SYSVAR(sim_cache_size), MYSQL_SYSVAR(use_clock_cache), + MYSQL_SYSVAR(cache_high_pri_pool_ratio), + MYSQL_SYSVAR(cache_dump), MYSQL_SYSVAR(cache_index_and_filter_blocks), + MYSQL_SYSVAR(cache_index_and_filter_with_high_priority), MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), MYSQL_SYSVAR(index_type), MYSQL_SYSVAR(hash_index_allow_collision), @@ -1838,6 +2091,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), MYSQL_SYSVAR(compact_cf), + MYSQL_SYSVAR(delete_cf), MYSQL_SYSVAR(signal_drop_index_thread), MYSQL_SYSVAR(pause_background_work), MYSQL_SYSVAR(enable_2pc), @@ -1883,10 +2137,13 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_manual_compaction_delay), MYSQL_SYSVAR(max_manual_compactions), MYSQL_SYSVAR(manual_compaction_threads), + MYSQL_SYSVAR(rollback_on_timeout), + + MYSQL_SYSVAR(enable_insert_with_update_caching), nullptr}; -static rocksdb::WriteOptions -rdb_get_rocksdb_write_options(my_core::THD *const thd) { +static rocksdb::WriteOptions rdb_get_rocksdb_write_options( + my_core::THD *const thd) { rocksdb::WriteOptions opt; opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); @@ -1943,19 +2200,6 @@ static int rocksdb_compact_column_family(THD *const thd, /////////////////////////////////////////////////////////////////////////////////////////// -/** - @brief - Function we use in the creation of our hash to get key. -*/ - -uchar * -Rdb_open_tables_map::get_hash_key(const Rdb_table_handler *const table_handler, - size_t *const length, - my_bool not_used MY_ATTRIBUTE((__unused__))) { - *length = table_handler->m_table_name_length; - return reinterpret_cast(table_handler->m_table_name); -} - /* Drop index thread's control */ @@ -2012,7 +2256,7 @@ class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier { void SnapshotCreated(const rocksdb::Snapshot *snapshot) override; -public: + public: Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete; Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete; @@ -2046,9 +2290,9 @@ String timeout_message(const char *command, const char *name1, /* This is the base class for transactions when interacting with rocksdb. -*/ + */ class Rdb_transaction { -protected: + protected: ulonglong m_write_count = 0; ulonglong m_insert_count = 0; ulonglong m_update_count = 0; @@ -2059,7 +2303,7 @@ protected: bool m_is_delayed_snapshot = false; bool m_is_two_phase = false; -private: + private: /* Number of write operations this transaction had when we took the last savepoint (the idea is not to take another savepoint if we haven't made @@ -2067,7 +2311,7 @@ private: */ ulonglong m_writes_at_last_savepoint; -protected: + protected: protected: THD *m_thd = nullptr; @@ -2092,9 +2336,9 @@ protected: // This should be used only when updating binlog information. virtual rocksdb::WriteBatchBase *get_write_batch() = 0; virtual bool commit_no_binlog() = 0; - virtual rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *column_family) = 0; + virtual rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *column_family) = 0; protected: /* @@ -2139,7 +2383,9 @@ protected: String m_detailed_error; int64_t m_snapshot_timestamp = 0; bool m_ddl_transaction; +#ifdef MARIAROCKS_NOT_YET std::shared_ptr m_explicit_snapshot; +#endif /* Tracks the number of tables in use through external_lock. @@ -2173,8 +2419,9 @@ protected: RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex); - for (auto it : s_tx_list) + for (auto it : s_tx_list) { walker->process_tran(it); + } RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex); } @@ -2194,7 +2441,8 @@ protected: convert_error_code_to_mysql() does: force a statement rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT: */ - my_core::thd_mark_transaction_to_rollback(thd, false /*just statement*/); + my_core::thd_mark_transaction_to_rollback( + thd, static_cast(rocksdb_rollback_on_timeout)); m_detailed_error.copy(timeout_message( "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); table_handler->m_lock_wait_timeout_counter.inc(); @@ -2216,9 +2464,10 @@ protected: char user_host_buff[MAX_USER_HOST_SIZE + 1]; make_user_name(thd, user_host_buff); // NO_LINT_DEBUG - sql_print_warning("Got snapshot conflict errors: User: %s " - "Query: %s", - user_host_buff, thd->query()); + sql_print_warning( + "Got snapshot conflict errors: User: %s " + "Query: %s", + user_host_buff, thd->query()); } m_detailed_error = String(" (snapshot conflict)", system_charset_info); table_handler->m_deadlock_counter.inc(); @@ -2315,8 +2564,9 @@ protected: if (m_is_tx_failed) { rollback(); res = false; - } else + } else { res = commit(); + } return res; } @@ -2367,7 +2617,7 @@ protected: bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } -private: + private: // The Rdb_sst_info structures we are currently loading. In a partitioned // table this can have more than one entry std::vector> m_curr_bulk_load; @@ -2376,7 +2626,7 @@ private: /* External merge sorts for bulk load: key ID -> merge sort instance */ std::unordered_map m_key_merge; -public: + public: int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf, Rdb_index_merge **key_merge) { int res; @@ -2397,22 +2647,62 @@ public: return HA_EXIT_SUCCESS; } - int finish_bulk_load(int print_client_error = true) { - int rc = 0, rc2; + /* Finish bulk loading for all table handlers belongs to one connection */ + int finish_bulk_load(bool *is_critical_error = nullptr, + int print_client_error = true) { + Ensure_cleanup cleanup([&]() { + // Always clear everything regardless of success/failure + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); + m_key_merge.clear(); + }); - std::vector>::iterator it; - for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) { - rc2 = (*it)->commit(print_client_error); - if (rc2 != 0 && rc == 0) { + int rc = 0; + if (is_critical_error) { + *is_critical_error = true; + } + + // PREPARE phase: finish all on-going bulk loading Rdb_sst_info and + // collect all Rdb_sst_commit_info containing (SST files, cf) + int rc2 = 0; + std::vector sst_commit_list; + sst_commit_list.reserve(m_curr_bulk_load.size()); + + for (auto &sst_info : m_curr_bulk_load) { + Rdb_sst_info::Rdb_sst_commit_info commit_info; + + // Commit the list of SST files and move it to the end of + // sst_commit_list, effectively transfer the ownership over + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 && rc == 0) { + // Don't return yet - make sure we finish all the SST infos rc = rc2; } - } - m_curr_bulk_load.clear(); - m_curr_bulk_load_tablename.clear(); - DBUG_ASSERT(m_curr_bulk_load.size() == 0); - // Flush the index_merge sort buffers + // Make sure we have work to do - we might be losing the race + if (rc2 == 0 && commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); + } + } + + if (rc) { + return rc; + } + + // MERGING Phase: Flush the index_merge sort buffers into SST files in + // Rdb_sst_info and collect all Rdb_sst_commit_info containing + // (SST files, cf) if (!m_key_merge.empty()) { + Ensure_cleanup malloc_cleanup([]() { + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this + point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + }); + rocksdb::Slice merge_key; rocksdb::Slice merge_val; for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { @@ -2429,9 +2719,20 @@ public: // be missed by the compaction filter and not be marked for // removal. It is unclear how to lock the sql table from the storage // engine to prevent modifications to it while bulk load is occurring. - if (keydef == nullptr || table_name.empty()) { - rc2 = HA_ERR_ROCKSDB_BULK_LOAD; - break; + if (keydef == nullptr) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_KEY_NOT_FOUND; + } else if (table_name.empty()) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_NO_SUCH_TABLE; } const std::string &index_name = keydef->get_name(); Rdb_index_merge &rdb_merge = it->second; @@ -2440,38 +2741,112 @@ public: // "./database/table" std::replace(table_name.begin(), table_name.end(), '.', '/'); table_name = "./" + table_name; - Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(), - *rocksdb_db_options, - THDVAR(get_thd(), trace_sst_api)); + auto sst_info = std::make_shared( + rdb, table_name, index_name, rdb_merge.get_cf(), + *rocksdb_db_options, THDVAR(get_thd(), trace_sst_api)); while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) { - if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) { + if ((rc2 = sst_info->put(merge_key, merge_val)) != 0) { + rc = rc2; + + // Don't return yet - make sure we finish the sst_info break; } } - // rc2 == -1 => finished ok; rc2 > 0 => error - if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) { - if (rc == 0) { - rc = rc2; - } - break; + // -1 => no more items + if (rc2 != -1 && rc != 0) { + rc = rc2; + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 != 0 && rc == 0) { + // Only set the error from sst_info->finish if finish failed and we + // didn't fail before. In other words, we don't have finish's + // success mask earlier failures + rc = rc2; + } + + if (rc) { + return rc; + } + + if (commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); } } - m_key_merge.clear(); - - /* - Explicitly tell jemalloc to clean up any unused dirty pages at this - point. - See https://reviews.facebook.net/D63723 for more details. - */ - purge_all_jemalloc_arenas(); } + + // Early return in case we lost the race completely and end up with no + // work at all + if (sst_commit_list.size() == 0) { + return rc; + } + + // INGEST phase: Group all Rdb_sst_commit_info by cf (as they might + // have the same cf across different indexes) and call out to RocksDB + // to ingest all SST files in one atomic operation + rocksdb::IngestExternalFileOptions options; + options.move_files = true; + options.snapshot_consistency = false; + options.allow_global_seqno = false; + options.allow_blocking_flush = false; + + std::map + arg_map; + + // Group by column_family + for (auto &commit_info : sst_commit_list) { + if (arg_map.find(commit_info.get_cf()) == arg_map.end()) { + rocksdb::IngestExternalFileArg arg; + arg.column_family = commit_info.get_cf(), + arg.external_files = commit_info.get_committed_files(), + arg.options = options; + + arg_map.emplace(commit_info.get_cf(), arg); + } else { + auto &files = arg_map[commit_info.get_cf()].external_files; + files.insert(files.end(), commit_info.get_committed_files().begin(), + commit_info.get_committed_files().end()); + } + } + + std::vector args; + size_t file_count = 0; + for (auto &cf_files_pair : arg_map) { + args.push_back(cf_files_pair.second); + file_count += cf_files_pair.second.external_files.size(); + } + + const rocksdb::Status s = rdb->IngestExternalFiles(args); + if (THDVAR(m_thd, trace_sst_api)) { + // NO_LINT_DEBUG + sql_print_information( + "SST Tracing: IngestExternalFile '%zu' files returned %s", file_count, + s.ok() ? "ok" : "not ok"); + } + + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + return HA_ERR_ROCKSDB_BULK_LOAD; + } + + // COMMIT phase: mark everything as completed. This avoids SST file + // deletion kicking in. Otherwise SST files would get deleted if this + // entire operation is aborted + for (auto &commit_info : sst_commit_list) { + commit_info.commit(); + } + return rc; } int start_bulk_load(ha_rocksdb *const bulk_load, - std::shared_ptr sst_info) { + std::shared_ptr sst_info) { /* If we already have an open bulk load of a table and the name doesn't match the current one, close out the currently running one. This allows @@ -2484,8 +2859,6 @@ public: bulk_load->get_table_basename() != m_curr_bulk_load_tablename) { const auto res = finish_bulk_load(); if (res != HA_EXIT_SUCCESS) { - m_curr_bulk_load.clear(); - m_curr_bulk_load_tablename.clear(); return res; } } @@ -2535,12 +2908,10 @@ public: inserts while inside a multi-statement transaction. */ bool flush_batch() { - if (get_write_count() == 0) - return false; + if (get_write_count() == 0) return false; /* Commit the current transaction */ - if (commit_no_binlog()) - return true; + if (commit_no_binlog()) return true; /* Start another one */ start_tx(); @@ -2552,7 +2923,7 @@ public: std::max(m_auto_incr_map[gl_index_id], curr_id); } -#ifndef NDEBUG +#ifndef DBUG_OFF ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) { if (m_auto_incr_map.count(gl_index_id) > 0) { return m_auto_incr_map[gl_index_id]; @@ -2563,13 +2934,14 @@ public: virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - const rocksdb::Slice &value) = 0; - virtual rocksdb::Status - delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) = 0; - virtual rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) = 0; + const rocksdb::Slice &value, + const bool assume_tracked) = 0; + virtual rocksdb::Status delete_key( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; + virtual rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; virtual bool has_modifications() const = 0; @@ -2585,25 +2957,23 @@ public: virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const = 0; - virtual rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) = 0; + virtual rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) = 0; - rocksdb::Iterator * - get_iterator(rocksdb::ColumnFamilyHandle *const column_family, - bool skip_bloom_filter, bool fill_cache, - const rocksdb::Slice &eq_cond_lower_bound, - const rocksdb::Slice &eq_cond_upper_bound, - bool read_current = false, bool create_snapshot = true) { + rocksdb::Iterator *get_iterator( + rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter, + bool fill_cache, const rocksdb::Slice &eq_cond_lower_bound, + const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false, + bool create_snapshot = true) { // Make sure we are not doing both read_current (which implies we don't // want a snapshot) and create_snapshot which makes sure we create // a snapshot DBUG_ASSERT(column_family != nullptr); DBUG_ASSERT(!read_current || !create_snapshot); - if (create_snapshot) - acquire_snapshot(true); + if (create_snapshot) acquire_snapshot(true); rocksdb::ReadOptions options = m_read_opts; @@ -2635,25 +3005,33 @@ public: entire transaction. */ do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; + m_writes_at_last_savepoint = m_write_count; } /* Called when a "top-level" statement inside a transaction completes successfully and its changes become part of the transaction's changes. */ - void make_stmt_savepoint_permanent() { - + int make_stmt_savepoint_permanent() { // Take another RocksDB savepoint only if we had changes since the last // one. This is very important for long transactions doing lots of // SELECTs. - if (m_writes_at_last_savepoint != m_write_count) - { - do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; - } - } + if (m_writes_at_last_savepoint != m_write_count) { + rocksdb::WriteBatchBase *batch = get_write_batch(); + rocksdb::Status status = rocksdb::Status::NotFound(); + while ((status = batch->PopSavePoint()) == rocksdb::Status::OK()) { + } + if (status != rocksdb::Status::NotFound()) { + return HA_EXIT_FAILURE; + } + + do_set_savepoint(); + m_writes_at_last_savepoint = m_write_count; + } + + return HA_EXIT_SUCCESS; + } /* Rollback to the savepoint we've set before the last statement @@ -2669,7 +3047,7 @@ public: statement start) because setting a savepoint is cheap. */ do_set_savepoint(); - m_writes_at_last_savepoint= m_write_count; + m_writes_at_last_savepoint = m_write_count; } } @@ -2733,10 +3111,11 @@ class Rdb_transaction_impl : public Rdb_transaction { rocksdb::Transaction *m_rocksdb_tx = nullptr; rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr; -public: + public: void set_lock_timeout(int timeout_sec_arg) override { - if (m_rocksdb_tx) + if (m_rocksdb_tx) { m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec)); + } } void set_sync(bool sync) override { @@ -2753,7 +3132,7 @@ public: virtual bool is_writebatch_trx() const override { return false; } -private: + private: void release_tx(void) { // We are done with the current active transaction object. Preserve it // for later reuse. @@ -2803,7 +3182,7 @@ private: goto error; } -error: + error: /* Save the transaction object to be reused */ release_tx(); @@ -2817,7 +3196,7 @@ error: return res; } -public: + public: void rollback() override { m_write_count = 0; m_insert_count = 0; @@ -2884,39 +3263,42 @@ public: m_read_opts.snapshot = nullptr; } - if (need_clear && m_rocksdb_tx != nullptr) - m_rocksdb_tx->ClearSnapshot(); + if (need_clear && m_rocksdb_tx != nullptr) m_rocksdb_tx->ClearSnapshot(); } bool has_snapshot() { return m_read_opts.snapshot != nullptr; } rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - const rocksdb::Slice &value) override { + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->Put(column_family, key, value); + } + return m_rocksdb_tx->Put(column_family, key, value, assume_tracked); } rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + const rocksdb::Slice &key, + const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->Delete(column_family, key); + } + return m_rocksdb_tx->Delete(column_family, key, assume_tracked); } - rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) override { ++m_write_count; ++m_lock_count; - if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); - return m_rocksdb_tx->SingleDelete(column_family, key); + } + return m_rocksdb_tx->SingleDelete(column_family, key, assume_tracked); } bool has_modifications() const override { @@ -2952,23 +3334,39 @@ public: return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); } - rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) override { - if (++m_lock_count > m_max_row_locks) + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) override { + if (++m_lock_count > m_max_row_locks) { return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } if (value != nullptr) { value->Reset(); } - return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, - exclusive); + rocksdb::Status s; + // If snapshot is null, pass it to GetForUpdate and snapshot is + // initialized there. Snapshot validation is skipped in that case. + if (m_read_opts.snapshot == nullptr || do_validate) { + s = m_rocksdb_tx->GetForUpdate( + m_read_opts, column_family, key, value, exclusive, + m_read_opts.snapshot ? do_validate : false); + } else { + // If snapshot is set, and if skipping validation, + // call GetForUpdate without validation and set back old snapshot + auto saved_snapshot = m_read_opts.snapshot; + m_read_opts.snapshot = nullptr; + s = m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, + exclusive, false); + m_read_opts.snapshot = saved_snapshot; + } + return s; } - rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *const column_family) override { + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const column_family) override { global_stats.queries[QUERIES_RANGE].inc(); return m_rocksdb_tx->GetIterator(options, column_family); } @@ -3013,10 +3411,9 @@ public: m_ddl_transaction = false; } - /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints */ - void do_set_savepoint() override { - m_rocksdb_tx->SetSavePoint(); - } + /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints + */ + void do_set_savepoint() override { m_rocksdb_tx->SetSavePoint(); } void do_rollback_to_savepoint() override { m_rocksdb_tx->RollbackToSavePoint(); @@ -3048,14 +3445,14 @@ public: const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot(); if (org_snapshot != cur_snapshot) { - if (org_snapshot != nullptr) - m_snapshot_timestamp = 0; + if (org_snapshot != nullptr) m_snapshot_timestamp = 0; m_read_opts.snapshot = cur_snapshot; - if (cur_snapshot != nullptr) + if (cur_snapshot != nullptr) { rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp); - else + } else { m_is_delayed_snapshot = true; + } } } } @@ -3066,7 +3463,7 @@ public: m_notifier = std::make_shared(this); } - virtual ~Rdb_transaction_impl() { + virtual ~Rdb_transaction_impl() override { rollback(); // Theoretically the notifier could outlive the Rdb_transaction_impl @@ -3098,7 +3495,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { m_ddl_transaction = false; } -private: + private: bool prepare(const rocksdb::TransactionName &name) override { return true; } bool commit_no_binlog() override { @@ -3122,7 +3519,7 @@ private: res = true; goto error; } -error: + error: reset(); m_write_count = 0; @@ -3135,16 +3532,12 @@ error: } /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */ - void do_set_savepoint() override { - m_batch->SetSavePoint(); - } + void do_set_savepoint() override { m_batch->SetSavePoint(); } - void do_rollback_to_savepoint() override { - m_batch->RollbackToSavePoint(); - } + void do_rollback_to_savepoint() override { m_batch->RollbackToSavePoint(); } -public: + public: bool is_writebatch_trx() const override { return true; } void set_lock_timeout(int timeout_sec_arg) override { @@ -3172,8 +3565,7 @@ public: } void acquire_snapshot(bool acquire_now) override { - if (m_read_opts.snapshot == nullptr) - snapshot_created(rdb->GetSnapshot()); + if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot()); } void release_snapshot() override { @@ -3184,8 +3576,8 @@ public: } rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - const rocksdb::Slice &value) override { + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { ++m_write_count; m_batch->Put(column_family, key, value); // Note Put/Delete in write batch doesn't return any error code. We simply @@ -3194,15 +3586,16 @@ public: } rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + const rocksdb::Slice &key, + const bool assume_tracked) override { ++m_write_count; m_batch->Delete(column_family, key); return rocksdb::Status::OK(); } - rocksdb::Status - single_delete(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key) override { + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool /* assume_tracked */) override { ++m_write_count; m_batch->SingleDelete(column_family, key); return rocksdb::Status::OK(); @@ -3227,10 +3620,10 @@ public: value); } - rocksdb::Status - get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, - bool exclusive) override { + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool /* exclusive */, const bool /* do_validate */) override { if (value == nullptr) { rocksdb::PinnableSlice pin_val; rocksdb::Status s = get(column_family, key, &pin_val); @@ -3241,9 +3634,9 @@ public: return get(column_family, key, value); } - rocksdb::Iterator * - get_iterator(const rocksdb::ReadOptions &options, - rocksdb::ColumnFamilyHandle *const column_family) override { + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const /* column_family */) override { const auto it = rdb->NewIterator(options); return m_batch->NewIteratorWithBase(it); } @@ -3264,8 +3657,7 @@ public: void start_stmt() override {} void rollback_stmt() override { - if (m_batch) - rollback_to_stmt_savepoint(); + if (m_batch) rollback_to_stmt_savepoint(); } explicit Rdb_writebatch_impl(THD *const thd) @@ -3274,7 +3666,7 @@ public: true); } - virtual ~Rdb_writebatch_impl() { + virtual ~Rdb_writebatch_impl() override { rollback(); delete m_batch; } @@ -3332,7 +3724,7 @@ class Rdb_perf_context_guard { } }; -} // anonymous namespace +} // anonymous namespace /* TODO: maybe, call this in external_lock() and store in ha_rocksdb.. @@ -3344,12 +3736,10 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { if (tx == nullptr) { bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET. if ((rpl_skip_tx_api && thd->rgi_slave) || - false /* MARIAROCKS_NOT_YET: THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)*/) + (THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)) { tx = new Rdb_writebatch_impl(thd); - } - else - { + } else { tx = new Rdb_transaction_impl(thd); } tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); @@ -3368,12 +3758,14 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { Rdb_transaction *tx = get_tx_from_thd(thd); if (tx != nullptr) { - int rc = tx->finish_bulk_load(false); - if (rc != 0) { + bool is_critical_error; + int rc = tx->finish_bulk_load(&is_critical_error, false); + if (rc != 0 && is_critical_error) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Error %d finalizing last SST file while " - "disconnecting", - rc); + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "disconnecting", + rc); } delete tx; @@ -3514,9 +3906,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) } DEBUG_SYNC(thd, "rocksdb.prepared"); - } - else + } else { tx->make_stmt_savepoint_permanent(); + } return HA_EXIT_SUCCESS; } @@ -3557,9 +3949,8 @@ static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) { DBUG_RETURN(HA_EXIT_SUCCESS); } -static int -rocksdb_rollback_by_xid(handlerton *const hton MY_ATTRIBUTE((__unused__)), - XID *const xid) { +static int rocksdb_rollback_by_xid( + handlerton *const hton MY_ATTRIBUTE((__unused__)), XID *const xid) { DBUG_ENTER_FUNC(); DBUG_ASSERT(hton != nullptr); @@ -3605,6 +3996,7 @@ static void rdb_xid_from_string(const std::string &src, XID *const dst) { DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE); DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE); + memset(dst->data, 0, XIDDATASIZE); src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length), RDB_XIDHDR_LEN); } @@ -3629,13 +4021,16 @@ static int rocksdb_recover(handlerton* hton, XID* xid_list, uint len) if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) { memcpy(binlog_file, file_buf, FN_REFLEN + 1); *binlog_pos = pos; - fprintf(stderr, "RocksDB: Last binlog file position %llu," - " file name %s\n", + // NO_LINT_DEBUG + fprintf(stderr, + "RocksDB: Last binlog file position %llu," + " file name %s\n", pos, file_buf); if (*gtid_buf) { global_sid_lock->rdlock(); binlog_max_gtid->parse(global_sid_map, gtid_buf); global_sid_lock->unlock(); + // NO_LINT_DEBUG fprintf(stderr, "RocksDB: Last MySQL Gtid %s\n", gtid_buf); } } @@ -3733,8 +4128,8 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx) Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); if (tx != nullptr) { - if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN))) { + if (commit_tx || (!my_core::thd_test_options( + thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { /* This will not add anything to commit_latency_stats, and this is correct right? @@ -3865,7 +4260,7 @@ static std::string format_string(const char *const format, ...) { char *buff = static_buff; std::unique_ptr dynamic_buff = nullptr; - len++; // Add one for null terminator + len++; // Add one for null terminator // for longer output use an allocated buffer if (static_cast(len) > sizeof(static_buff)) { @@ -3890,7 +4285,7 @@ static std::string format_string(const char *const format, ...) { } class Rdb_snapshot_status : public Rdb_tx_list_walker { -private: + private: std::string m_data; static std::string current_timestamp(void) { @@ -3924,9 +4319,8 @@ private: "=========================================\n"; } - static Rdb_deadlock_info::Rdb_dl_trx_info - get_dl_txn_info(const rocksdb::DeadlockInfo &txn, - const GL_INDEX_ID &gl_index_id) { + static Rdb_deadlock_info::Rdb_dl_trx_info get_dl_txn_info( + const rocksdb::DeadlockInfo &txn, const GL_INDEX_ID &gl_index_id) { Rdb_deadlock_info::Rdb_dl_trx_info txn_data; txn_data.trx_id = txn.m_txn_id; @@ -3953,13 +4347,12 @@ private: return txn_data; } - static Rdb_deadlock_info - get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) { + static Rdb_deadlock_info get_dl_path_trx_info( + const rocksdb::DeadlockPath &path_entry) { Rdb_deadlock_info deadlock_info; - for (auto it = path_entry.path.begin(); it != path_entry.path.end(); - it++) { - auto txn = *it; + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) { + const auto &txn = *it; const GL_INDEX_ID gl_index_id = { txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( txn.m_waiting_key.c_str()))}; @@ -3968,7 +4361,7 @@ private: DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); /* print the first txn in the path to display the full deadlock cycle */ if (!path_entry.path.empty() && !path_entry.limit_exceeded) { - auto deadlocking_txn = *(path_entry.path.end() - 1); + const auto &deadlocking_txn = *(path_entry.path.end() - 1); deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id; deadlock_info.deadlock_time = path_entry.deadlock_time; } @@ -3997,7 +4390,7 @@ private: #endif m_data += format_string( "---SNAPSHOT, ACTIVE %lld sec\n" - "%s\n" + "%s\n" "lock count %llu, write count %llu\n" "insert count %llu, update count %llu, delete count %llu\n", (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(), @@ -4010,19 +4403,21 @@ private: auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; - for (auto path_entry : dlock_buffer) { + for (const auto &path_entry : dlock_buffer) { std::string path_data; if (path_entry.limit_exceeded) { path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; } else { - path_data += "\n*** DEADLOCK PATH\n" - "=========================================\n"; + path_data += + "\n*** DEADLOCK PATH\n" + "=========================================\n"; const auto dl_info = get_dl_path_trx_info(path_entry); const auto deadlock_time = dl_info.deadlock_time; for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) { - const auto trx_info = *it; + const auto &trx_info = *it; path_data += format_string( - "TIMESTAMP: %" PRId64 "\n" + "TIMESTAMP: %" PRId64 + "\n" "TRANSACTION ID: %u\n" "COLUMN FAMILY NAME: %s\n" "WAITING KEY: %s\n" @@ -4037,9 +4432,9 @@ private: path_data += "---------------WAITING FOR---------------\n"; } } - path_data += - format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", - dl_info.victim_trx_id); + path_data += format_string( + "\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", + dl_info.victim_trx_id); } m_data += path_data; } @@ -4048,7 +4443,7 @@ private: std::vector get_deadlock_info() { std::vector deadlock_info; auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); - for (auto path_entry : dlock_buffer) { + for (const auto &path_entry : dlock_buffer) { if (!path_entry.limit_exceeded) { deadlock_info.push_back(get_dl_path_trx_info(path_entry)); } @@ -4063,10 +4458,10 @@ private: * out relevant information for information_schema.rocksdb_trx */ class Rdb_trx_info_aggregator : public Rdb_tx_list_walker { -private: + private: std::vector *m_trx_info; -public: + public: explicit Rdb_trx_info_aggregator(std::vector *const trx_info) : m_trx_info(trx_info) {} @@ -4197,9 +4592,10 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, // sure that output will look unified. DBUG_ASSERT(commit_latency_stats != nullptr); - snprintf(buf, sizeof(buf), "rocksdb.commit_latency statistics " - "Percentiles :=> 50 : %.2f 95 : %.2f " - "99 : %.2f 100 : %.2f\n", + snprintf(buf, sizeof(buf), + "rocksdb.commit_latency statistics " + "Percentiles :=> 50 : %.2f 95 : %.2f " + "99 : %.2f 100 : %.2f\n", commit_latency_stats->Percentile(50), commit_latency_stats->Percentile(95), commit_latency_stats->Percentile(99), @@ -4221,7 +4617,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, } if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) { - snprintf(buf, sizeof(buf), "rocksdb.actual_delayed_write_rate " + snprintf(buf, sizeof(buf), "COUNT : %llu\n", (ulonglong)v); str.append(buf); @@ -4309,6 +4705,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list); if (!s.ok()) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n", s.ToString().c_str()); res |= true; @@ -4325,37 +4722,23 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, "\noperation_type: " + it.GetOperationName(it.operation_type) + "\noperation_stage: " + it.GetOperationStageName(it.operation_stage) + - "\nelapsed_time_ms: " + - it.MicrosToString(it.op_elapsed_micros); + "\nelapsed_time_ms: " + it.MicrosToString(it.op_elapsed_micros); - for (auto &it_props : - it.InterpretOperationProperties(it.operation_type, - it.op_properties)) { + for (auto &it_props : it.InterpretOperationProperties( + it.operation_type, it.op_properties)) { str += "\n" + it_props.first + ": " + std::to_string(it_props.second); } str += "\nstate_type: " + it.GetStateName(it.state_type); - res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), - str, stat_print); + res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), str, + stat_print); } } #ifdef MARIAROCKS_NOT_YET /* Explicit snapshot information */ - str.clear(); - { - std::lock_guard lock(explicit_snapshot_mutex); - for (const auto &elem : explicit_snapshots) { - const auto &ss = elem.second.lock(); - DBUG_ASSERT(ss != nullptr); - const auto &info = ss->ss_info; - str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) + - "\nBinlog File: " + info.binlog_file + - "\nBinlog Pos: " + std::to_string(info.binlog_pos) + - "\nGtid Executed: " + info.gtid_executed + "\n"; - } - } + str = Rdb_explicit_snapshot::dump_snapshots(); #endif if (!str.empty()) { @@ -4390,38 +4773,38 @@ static bool rocksdb_explicit_snapshot( snapshot_info_st *ss_info) /*!< out: Snapshot information */ { switch (ss_info->op) { - case snapshot_operation::SNAPSHOT_CREATE: { - if (mysql_bin_log_is_open()) { - mysql_bin_log_lock_commits(ss_info); - } - auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot()); - if (mysql_bin_log_is_open()) { - mysql_bin_log_unlock_commits(ss_info); - } + case snapshot_operation::SNAPSHOT_CREATE: { + if (mysql_bin_log_is_open()) { + mysql_bin_log_lock_commits(ss_info); + } + auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot()); + if (mysql_bin_log_is_open()) { + mysql_bin_log_unlock_commits(ss_info); + } - thd->set_explicit_snapshot(s); - return s == nullptr; - } - case snapshot_operation::SNAPSHOT_ATTACH: { - auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id); - if (!s) { - return true; + thd->set_explicit_snapshot(s); + return s == nullptr; } - *ss_info = s->ss_info; - thd->set_explicit_snapshot(s); - return false; - } - case snapshot_operation::SNAPSHOT_RELEASE: { - if (!thd->get_explicit_snapshot()) { - return true; + case snapshot_operation::SNAPSHOT_ATTACH: { + auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id); + if (!s) { + return true; + } + *ss_info = s->ss_info; + thd->set_explicit_snapshot(s); + return false; } - *ss_info = thd->get_explicit_snapshot()->ss_info; - thd->set_explicit_snapshot(nullptr); - return false; - } - default: - DBUG_ASSERT(false); - return true; + case snapshot_operation::SNAPSHOT_RELEASE: { + if (!thd->get_explicit_snapshot()) { + return true; + } + *ss_info = thd->get_explicit_snapshot()->ss_info; + thd->set_explicit_snapshot(nullptr); + return false; + } + default: + DBUG_ASSERT(false); + return true; } return true; } @@ -4567,7 +4950,7 @@ static int rocksdb_start_tx_with_shared_read_view( // case: an explicit snapshot was not assigned to this transaction if (!tx->m_explicit_snapshot) { tx->m_explicit_snapshot = - Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot); + Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot); if (!tx->m_explicit_snapshot) { my_printf_error(ER_UNKNOWN_ERROR, "Could not create snapshot", MYF(0)); error = HA_EXIT_FAILURE; @@ -4611,9 +4994,8 @@ static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd, return tx->rollback_to_savepoint(savepoint); } -static bool -rocksdb_rollback_to_savepoint_can_release_mdl(handlerton *const hton, - THD *const thd) { +static bool rocksdb_rollback_to_savepoint_can_release_mdl( + handlerton *const /* hton */, THD *const /* thd */) { return true; } @@ -4661,7 +5043,7 @@ static void rocksdb_update_table_stats( /* Function needs to return void because of the interface and we've * detected an error which shouldn't happen. There's no way to let * caller know that something failed. - */ + */ SHIP_ASSERT(false); return; } @@ -4741,8 +5123,9 @@ static rocksdb::Status check_rocksdb_options_compatibility( } if (loaded_cf_descs.size() != cf_descr.size()) { - return rocksdb::Status::NotSupported("Mismatched size of column family " - "descriptors."); + return rocksdb::Status::NotSupported( + "Mismatched size of column family " + "descriptors."); } // Please see RocksDB documentation for more context about why we need to set @@ -4792,17 +5175,22 @@ static int rocksdb_init_func(void *const p) { } if (rdb_check_rocksdb_corruption()) { - sql_print_error("RocksDB: There was a corruption detected in RockDB files. " - "Check error log emitted earlier for more details."); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: There was a corruption detected in RockDB files. " + "Check error log emitted earlier for more details."); if (rocksdb_allow_to_start_after_corruption) { + // NO_LINT_DEBUG sql_print_information( "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent " "server operating if RocksDB corruption is detected."); } else { - sql_print_error("RocksDB: The server will exit normally and stop restart " - "attempts. Remove %s file from data directory and " - "start mysqld manually.", - rdb_corruption_marker_file_name().c_str()); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: The server will exit normally and stop restart " + "attempts. Remove %s file from data directory and " + "start mysqld manually.", + rdb_corruption_marker_file_name().c_str()); exit(0); } } @@ -4813,8 +5201,10 @@ static int rocksdb_init_func(void *const p) { init_rocksdb_psi_keys(); rocksdb_hton = (handlerton *)p; - mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &rdb_open_tables.m_mutex, - MY_MUTEX_INIT_FAST); + + rdb_open_tables.init(); + Ensure_cleanup rdb_open_tables_cleanup([]() { rdb_open_tables.free(); }); + #ifdef HAVE_PSI_INTERFACE rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key); rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key, @@ -4885,6 +5275,8 @@ static int rocksdb_init_func(void *const p) { /* Not needed in MariaDB: rocksdb_hton->flush_logs = rocksdb_flush_wal; + rocksdb_hton->handle_single_table_select = rocksdb_handle_single_table_select; + */ rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | @@ -4894,16 +5286,25 @@ static int rocksdb_init_func(void *const p) { DBUG_ASSERT(!mysqld_embedded); if (rocksdb_db_options->max_open_files > (long)open_files_limit) { - sql_print_information("RocksDB: rocksdb_max_open_files should not be " - "greater than the open_files_limit, effective value " - "of rocksdb_max_open_files is being set to " - "open_files_limit / 2."); + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: rocksdb_max_open_files should not be " + "greater than the open_files_limit, effective value " + "of rocksdb_max_open_files is being set to " + "open_files_limit / 2."); rocksdb_db_options->max_open_files = open_files_limit / 2; } else if (rocksdb_db_options->max_open_files == -2) { rocksdb_db_options->max_open_files = open_files_limit / 2; } +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + rdb_read_free_regex_handler.set_patterns(DEFAULT_READ_FREE_RPL_TABLES); +#endif + rocksdb_stats = rocksdb::CreateDBStatistics(); + rocksdb_stats->set_stats_level( + static_cast(rocksdb_stats_level)); + rocksdb_stats_level = rocksdb_stats->get_stats_level(); rocksdb_db_options->statistics = rocksdb_stats; if (rocksdb_rate_limiter_bytes_per_sec != 0) { @@ -4937,13 +5338,15 @@ static int rocksdb_init_func(void *const p) { rocksdb_db_options->use_direct_reads) { // allow_mmap_reads implies !use_direct_reads and RocksDB will not open if // mmap_reads and direct_reads are both on. (NO_LINT_DEBUG) - sql_print_error("RocksDB: Can't enable both use_direct_reads " - "and allow_mmap_reads\n"); + sql_print_error( + "RocksDB: Can't enable both use_direct_reads " + "and allow_mmap_reads\n"); DBUG_RETURN(HA_EXIT_FAILURE); } // Check whether the filesystem backing rocksdb_datadir allows O_DIRECT - if (rocksdb_db_options->use_direct_reads) { + if (rocksdb_db_options->use_direct_reads || + rocksdb_db_options->use_direct_io_for_flush_and_compaction) { rocksdb::EnvOptions soptions; rocksdb::Status check_status; rocksdb::Env *const env = rocksdb_db_options->env; @@ -4964,9 +5367,11 @@ static int rocksdb_init_func(void *const p) { } if (!check_status.ok()) { - sql_print_error("RocksDB: Unable to use direct io in rocksdb-datadir:" - "(%s)", check_status.getState()); - rdb_open_tables.free_hash(); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Unable to use direct io in rocksdb-datadir:" + "(%s)", + check_status.getState()); DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -4974,17 +5379,19 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_db_options->allow_mmap_writes && rocksdb_db_options->use_direct_io_for_flush_and_compaction) { // See above comment for allow_mmap_reads. (NO_LINT_DEBUG) - sql_print_error("RocksDB: Can't enable both " - "use_direct_io_for_flush_and_compaction and " - "allow_mmap_writes\n"); + sql_print_error( + "RocksDB: Can't enable both " + "use_direct_io_for_flush_and_compaction and " + "allow_mmap_writes\n"); DBUG_RETURN(HA_EXIT_FAILURE); } if (rocksdb_db_options->allow_mmap_writes && rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { // NO_LINT_DEBUG - sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " - "to use allow_mmap_writes"); + sql_print_error( + "RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " + "to use allow_mmap_writes"); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5011,15 +5418,19 @@ static int rocksdb_init_func(void *const p) { #endif ) { sql_print_information("RocksDB: Got ENOENT when listing column families"); + + // NO_LINT_DEBUG sql_print_information( "RocksDB: assuming that we're creating a new database"); } else { rdb_log_status_error(status, "Error listing column families"); DBUG_RETURN(HA_EXIT_FAILURE); } - } else + } else { + // NO_LINT_DEBUG sql_print_information("RocksDB: %ld column families found", cf_names.size()); + } std::vector cf_descr; std::vector cf_handles; @@ -5028,9 +5439,33 @@ static int rocksdb_init_func(void *const p) { (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; if (!rocksdb_tbl_options->no_block_cache) { - std::shared_ptr block_cache = rocksdb_use_clock_cache - ? rocksdb::NewClockCache(rocksdb_block_cache_size) - : rocksdb::NewLRUCache(rocksdb_block_cache_size); + std::shared_ptr memory_allocator; + if (!rocksdb_cache_dump) { + size_t block_size = rocksdb_tbl_options->block_size; + rocksdb::JemallocAllocatorOptions alloc_opt; + // Limit jemalloc tcache memory usage. The range + // [block_size/4, block_size] should be enough to cover most of + // block cache allocation sizes. + alloc_opt.limit_tcache_size = true; + alloc_opt.tcache_size_lower_bound = block_size / 4; + alloc_opt.tcache_size_upper_bound = block_size; + rocksdb::Status new_alloc_status = + rocksdb::NewJemallocNodumpAllocator(alloc_opt, &memory_allocator); + if (!new_alloc_status.ok()) { + // Fallback to use default malloc/free. + rdb_log_status_error(new_alloc_status, + "Error excluding block cache from core dump"); + memory_allocator = nullptr; + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + std::shared_ptr block_cache = + rocksdb_use_clock_cache + ? rocksdb::NewClockCache(rocksdb_block_cache_size) + : rocksdb::NewLRUCache( + rocksdb_block_cache_size, -1 /*num_shard_bits*/, + false /*strict_capcity_limit*/, + rocksdb_cache_high_pri_pool_ratio, memory_allocator); if (rocksdb_sim_cache_size > 0) { // Simulated cache enabled // Wrap block cache inside a simulated cache and pass it to RocksDB @@ -5065,7 +5500,7 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_persistent_cache_size_mb > 0) { std::shared_ptr pcache; - uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024; + uint64_t cache_size_bytes = rocksdb_persistent_cache_size_mb * 1024 * 1024; status = rocksdb::NewPersistentCache( rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), cache_size_bytes, myrocks_logger, true, &pcache); @@ -5077,6 +5512,7 @@ static int rocksdb_init_func(void *const p) { } rocksdb_tbl_options->persistent_cache = pcache; } else if (strlen(rocksdb_persistent_cache_path)) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb"); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5094,17 +5530,23 @@ static int rocksdb_init_func(void *const p) { If there are no column families, we're creating the new database. Create one column family named "default". */ - if (cf_names.size() == 0) - cf_names.push_back(DEFAULT_CF_NAME); + if (cf_names.size() == 0) cf_names.push_back(DEFAULT_CF_NAME); std::vector compaction_enabled_cf_indices; + + // NO_LINT_DEBUG sql_print_information("RocksDB: Column Families at start:"); for (size_t i = 0; i < cf_names.size(); ++i) { rocksdb::ColumnFamilyOptions opts; cf_options_map->get_cf_options(cf_names[i], &opts); + // NO_LINT_DEBUG sql_print_information(" cf=%s", cf_names[i].c_str()); + + // NO_LINT_DEBUG sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + + // NO_LINT_DEBUG sql_print_information(" target_file_size_base=%" PRIu64, opts.target_file_size_base); @@ -5185,25 +5627,27 @@ static int rocksdb_init_func(void *const p) { DBUG_RETURN(HA_EXIT_FAILURE); } - auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME -#ifdef HAVE_PSI_INTERFACE - , - rdb_background_psi_thread_key +#ifndef HAVE_PSI_INTERFACE + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME); +#else + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME, + rdb_background_psi_thread_key); #endif - ); if (err != 0) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)", err); DBUG_RETURN(HA_EXIT_FAILURE); } - err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME -#ifdef HAVE_PSI_INTERFACE - , - rdb_drop_idx_psi_thread_key +#ifndef HAVE_PSI_INTERFACE + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME); +#else + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME, + rdb_drop_idx_psi_thread_key); #endif - ); if (err != 0) { + // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)", err); DBUG_RETURN(HA_EXIT_FAILURE); @@ -5220,7 +5664,6 @@ static int rocksdb_init_func(void *const p) { sql_print_error( "RocksDB: Couldn't start the manual compaction thread: (errno=%d)", err); - rdb_open_tables.free_hash(); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5254,7 +5697,6 @@ static int rocksdb_init_func(void *const p) { if (err != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Couldn't initialize error messages"); - rdb_open_tables.m_hash.~Rdb_table_set(); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -5277,13 +5719,17 @@ static int rocksdb_init_func(void *const p) { } #if !defined(_WIN32) && !defined(__APPLE__) - io_watchdog = new Rdb_io_watchdog(directories); + io_watchdog = new Rdb_io_watchdog(std::move(directories)); io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs); #endif // NO_LINT_DEBUG - sql_print_information("MyRocks storage engine plugin has been successfully " - "initialized."); + sql_print_information( + "MyRocks storage engine plugin has been successfully " + "initialized."); + + // Skip cleaning up rdb_open_tables as we've succeeded + rdb_open_tables_cleanup.skip(); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -5340,18 +5786,18 @@ static int rocksdb_done_func(void *const p) { "RocksDB: Couldn't stop the manual compaction thread: (errno=%d)", err); } - if (rdb_open_tables.m_hash.size()) { + if (rdb_open_tables.count()) { // Looks like we are getting unloaded and yet we have some open tables // left behind. error = 1; } + rdb_open_tables.free(); /* destructors for static objects can be called at _exit(), but we want to free the memory at dlclose() */ - rdb_open_tables.m_hash.~Rdb_table_set(); - mysql_mutex_destroy(&rdb_open_tables.m_mutex); + // MARIADB_MERGE_2019: rdb_open_tables.m_hash.~Rdb_table_set(); mysql_mutex_destroy(&rdb_sysvars_mutex); mysql_mutex_destroy(&rdb_block_cache_resize_mutex); @@ -5439,7 +5885,7 @@ static inline void rocksdb_smart_next(bool seek_backward, } } -#ifndef NDEBUG +#ifndef DBUG_OFF // simulate that RocksDB has reported corrupted data static void dbug_change_status_to_corrupted(rocksdb::Status *status) { *status = rocksdb::Status::Corruption(); @@ -5474,39 +5920,39 @@ static inline bool is_valid(rocksdb::Iterator *scan_it) { they are needed to function. */ -Rdb_table_handler * -Rdb_open_tables_map::get_table_handler(const char *const table_name) { - Rdb_table_handler *table_handler; - uint length; - char *tmp_name; - +Rdb_table_handler *Rdb_open_tables_map::get_table_handler( + const char *const table_name) { DBUG_ASSERT(table_name != nullptr); - length = (uint)strlen(table_name); + + Rdb_table_handler *table_handler; + + std::string table_name_str(table_name); // First, look up the table in the hash map. RDB_MUTEX_LOCK_CHECK(m_mutex); - if (!m_hash.size() || !(table_handler = m_hash.find(table_name, length))) { + const auto it = m_table_map.find(table_name_str); + if (it != m_table_map.end()) { + // Found it + table_handler = it->second; + } else { + char *tmp_name; + // Since we did not find it in the hash map, attempt to create and add it // to the hash map. if (!(table_handler = reinterpret_cast(my_multi_malloc( MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler), - &tmp_name, length + 1, NullS)))) { + &tmp_name, table_name_str.length() + 1, NullS)))) { // Allocating a new Rdb_table_handler and a new table name failed. RDB_MUTEX_UNLOCK_CHECK(m_mutex); return nullptr; } table_handler->m_ref_count = 0; - table_handler->m_table_name_length = length; + table_handler->m_table_name_length = table_name_str.length(); table_handler->m_table_name = tmp_name; strmov(table_handler->m_table_name, table_name); - if (m_hash.insert(table_handler)) { - // Inserting into the hash map failed. - RDB_MUTEX_UNLOCK_CHECK(m_mutex); - my_free(table_handler); - return nullptr; - } + m_table_map.emplace(table_name_str, table_handler); thr_lock_init(&table_handler->m_thr_lock); #ifdef MARIAROCKS_NOT_YET @@ -5527,16 +5973,15 @@ std::vector rdb_get_open_table_names(void) { } std::vector Rdb_open_tables_map::get_table_names(void) const { - size_t i; const Rdb_table_handler *table_handler; std::vector names; RDB_MUTEX_LOCK_CHECK(m_mutex); - for (i = 0; (table_handler = m_hash.at(i)); i++) { + for (const auto &kv : m_table_map) { + table_handler = kv.second; DBUG_ASSERT(table_handler != nullptr); names.push_back(table_handler->m_table_name); } - DBUG_ASSERT(i == m_hash.size()); RDB_MUTEX_UNLOCK_CHECK(m_mutex); return names; @@ -5549,44 +5994,44 @@ std::vector Rdb_open_tables_map::get_table_names(void) const { static ulonglong rdb_get_int_col_max_value(const Field *field) { ulonglong max_value = 0; switch (field->key_type()) { - case HA_KEYTYPE_BINARY: - max_value = 0xFFULL; - break; - case HA_KEYTYPE_INT8: - max_value = 0x7FULL; - break; - case HA_KEYTYPE_USHORT_INT: - max_value = 0xFFFFULL; - break; - case HA_KEYTYPE_SHORT_INT: - max_value = 0x7FFFULL; - break; - case HA_KEYTYPE_UINT24: - max_value = 0xFFFFFFULL; - break; - case HA_KEYTYPE_INT24: - max_value = 0x7FFFFFULL; - break; - case HA_KEYTYPE_ULONG_INT: - max_value = 0xFFFFFFFFULL; - break; - case HA_KEYTYPE_LONG_INT: - max_value = 0x7FFFFFFFULL; - break; - case HA_KEYTYPE_ULONGLONG: - max_value = 0xFFFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_LONGLONG: - max_value = 0x7FFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_FLOAT: - max_value = 0x1000000ULL; - break; - case HA_KEYTYPE_DOUBLE: - max_value = 0x20000000000000ULL; - break; - default: - abort(); + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_FLOAT: + max_value = 0x1000000ULL; + break; + case HA_KEYTYPE_DOUBLE: + max_value = 0x20000000000000ULL; + break; + default: + abort(); } return max_value; @@ -5595,7 +6040,7 @@ static ulonglong rdb_get_int_col_max_value(const Field *field) { void ha_rocksdb::load_auto_incr_value() { ulonglong auto_incr = 0; bool validate_last = false, use_datadic = true; -#ifndef NDEBUG +#ifndef DBUG_OFF DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;); validate_last = true; #endif @@ -5642,7 +6087,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() { // appropriately and non-index-only lookup will not read the value) const bool save_keyread_only = m_keyread_only; m_keyread_only = true; - m_key_requested = true; + m_converter->set_is_key_requested(true); if (!index_last(table->record[0])) { Field *field = @@ -5654,7 +6099,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() { if (last_val != max_val) { last_val++; } -#ifndef NDEBUG +#ifndef DBUG_OFF ulonglong dd_val; if (last_val <= max_val) { const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id(); @@ -5779,8 +6224,9 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) { // Get hidden primary key from old key slice Rdb_string_reader reader(&rowkey_slice); - if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) + if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) { return HA_ERR_ROCKSDB_CORRUPT_DATA; + } const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */ const uchar *from = reinterpret_cast(reader.read(length)); @@ -5808,8 +6254,9 @@ void Rdb_open_tables_map::release_table_handler( DBUG_ASSERT(table_handler->m_ref_count > 0); if (!--table_handler->m_ref_count) { // Last reference was released. Tear down the hash entry. - const auto ret MY_ATTRIBUTE((__unused__)) = m_hash.remove(table_handler); - DBUG_ASSERT(!ret); // the hash entry must actually be found and deleted + const auto ret MY_ATTRIBUTE((__unused__)) = + m_table_map.erase(std::string(table_handler->m_table_name)); + DBUG_ASSERT(ret == 1); // the hash entry must actually be found and deleted my_core::thr_lock_delete(&table_handler->m_thr_lock); my_free(table_handler); } @@ -5825,19 +6272,34 @@ static handler *rocksdb_create_handler(my_core::handlerton *const hton, ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, my_core::TABLE_SHARE *const table_arg) - : handler(hton, table_arg), m_table_handler(nullptr), m_scan_it(nullptr), - m_scan_it_skips_bloom(false), m_scan_it_snapshot(nullptr), - m_scan_it_lower_bound(nullptr), m_scan_it_upper_bound(nullptr), - m_tbl_def(nullptr), m_pk_descr(nullptr), m_key_descr_arr(nullptr), - m_pk_can_be_decoded(false), m_maybe_unpack_info(false), - m_pk_tuple(nullptr), m_pk_packed_tuple(nullptr), - m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr), - m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), - m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr), - m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr), - m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr), - m_row_checksums_checked(0), m_in_rpl_delete_rows(false), - m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {} + : handler(hton, table_arg), + m_table_handler(nullptr), + m_scan_it(nullptr), + m_scan_it_skips_bloom(false), + m_scan_it_snapshot(nullptr), + m_scan_it_lower_bound(nullptr), + m_scan_it_upper_bound(nullptr), + m_tbl_def(nullptr), + m_pk_descr(nullptr), + m_key_descr_arr(nullptr), + m_pk_can_be_decoded(false), + m_pk_tuple(nullptr), + m_pk_packed_tuple(nullptr), + m_sk_packed_tuple(nullptr), + m_end_key_packed_tuple(nullptr), + m_sk_match_prefix(nullptr), + m_sk_match_prefix_buf(nullptr), + m_sk_packed_tuple_old(nullptr), + m_dup_sk_packed_tuple(nullptr), + m_dup_sk_packed_tuple_old(nullptr), + m_pack_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), + m_keyread_only(false), + m_insert_with_update(false), + m_dup_pk_found(false), + m_in_rpl_delete_rows(false), + m_in_rpl_update_rows(false), + m_force_skip_unique_check(false) {} const std::string &ha_rocksdb::get_table_basename() const { @@ -5856,9 +6318,9 @@ bool ha_rocksdb::init_with_fields() { if (pk != MAX_KEY) { const uint key_parts = table_share->key_info[pk].user_defined_key_parts; check_keyread_allowed(pk /*PK*/, key_parts - 1, true); - } else + } else { m_pk_can_be_decoded = false; - + } cached_table_flags = table_flags(); DBUG_RETURN(false); /* Ok */ @@ -5915,298 +6377,52 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, RDB_MAX_HEXDUMP_LEN); const GL_INDEX_ID gl_index_id = kd.get_gl_index_id(); // NO_LINT_DEBUG - sql_print_error("Decoding ttl from PK value failed, " - "for index (%u,%u), val: %s", - gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); + sql_print_error( + "Decoding ttl from PK value failed, " + "for index (%u,%u), val: %s", + gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); DBUG_ASSERT(0); return false; } /* Hide record if it has expired before the current snapshot time. */ uint64 read_filter_ts = 0; -#ifndef NDEBUG +#ifndef DBUG_OFF read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); #endif bool is_hide_ttl = ts + kd.m_ttl_duration + read_filter_ts <= static_cast(curr_ts); if (is_hide_ttl) { update_row_stats(ROWS_FILTERED); + + /* increment examined row count when rows are skipped */ + THD *thd = ha_thd(); + thd->inc_examined_row_count(1); + DEBUG_SYNC(thd, "rocksdb.ttl_rows_examined"); } return is_hide_ttl; } -void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, - rocksdb::Iterator *const iter, - bool seek_backward) { +int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward) { if (kd.has_ttl()) { + THD *thd = ha_thd(); while (iter->Valid() && should_hide_ttl_rec( kd, iter->value(), get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { + DEBUG_SYNC(thd, "rocksdb.check_flags_ser"); + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } rocksdb_smart_next(seek_backward, iter); } } -} - -/** - Convert record from table->record[0] form into a form that can be written - into rocksdb. - - @param pk_packed_slice Packed PK tuple. We need it in order to compute - and store its CRC. - @param packed_rec OUT Data slice with record data. -*/ - -int ha_rocksdb::convert_record_to_storage_format( - const struct update_row_info &row_info, rocksdb::Slice *const packed_rec) { - DBUG_ASSERT_IMP(m_maybe_unpack_info, row_info.new_pk_unpack_info); - DBUG_ASSERT(m_pk_descr != nullptr); - - const rocksdb::Slice &pk_packed_slice = row_info.new_pk_slice; - Rdb_string_writer *const pk_unpack_info = row_info.new_pk_unpack_info; - bool has_ttl = m_pk_descr->has_ttl(); - bool has_ttl_column = !m_pk_descr->m_ttl_column.empty(); - bool ttl_in_pk = has_ttl_column && (row_info.ttl_pk_offset != UINT_MAX); - - m_storage_record.length(0); - - if (has_ttl) { - /* If it's a TTL record, reserve space for 8 byte TTL value in front. */ - m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0); - m_ttl_bytes_updated = false; - - /* - If the TTL is contained within the key, we use the offset to find the - TTL value and place it in the beginning of the value record. - */ - if (ttl_in_pk) { - Rdb_string_reader reader(&pk_packed_slice); - const char *ts; - if (!reader.read(row_info.ttl_pk_offset) || - !(ts = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) { - std::string buf; - buf = rdb_hexdump(pk_packed_slice.data(), pk_packed_slice.size(), - RDB_MAX_HEXDUMP_LEN); - const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id(); - // NO_LINT_DEBUG - sql_print_error("Decoding ttl from PK failed during insert, " - "for index (%u,%u), key: %s", - gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); - return HA_EXIT_FAILURE; - } - - char *const data = const_cast(m_storage_record.ptr()); - memcpy(data, ts, ROCKSDB_SIZEOF_TTL_RECORD); -#ifndef NDEBUG - // Adjust for test case if needed - rdb_netbuf_store_uint64( - reinterpret_cast(data), - rdb_netbuf_to_uint64(reinterpret_cast(data)) + - rdb_dbug_set_ttl_rec_ts()); -#endif - // Also store in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } else if (!has_ttl_column) { - /* - For implicitly generated TTL records we need to copy over the old - TTL value from the old record in the event of an update. It was stored - in m_ttl_bytes. - - Otherwise, generate a timestamp using the current time. - */ - if (!row_info.old_pk_slice.empty()) { - char *const data = const_cast(m_storage_record.ptr()); - memcpy(data, m_ttl_bytes, sizeof(uint64)); - } else { - uint64 ts = static_cast(std::time(nullptr)); -#ifndef NDEBUG - ts += rdb_dbug_set_ttl_rec_ts(); -#endif - char *const data = const_cast(m_storage_record.ptr()); - rdb_netbuf_store_uint64(reinterpret_cast(data), ts); - // Also store in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } - } - } else { - /* All NULL bits are initially 0 */ - m_storage_record.fill(m_null_bytes_in_rec, 0); - } - - // If a primary key may have non-empty unpack_info for certain values, - // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block - // itself was prepared in Rdb_key_def::pack_record. - if (m_maybe_unpack_info) { - m_storage_record.append(reinterpret_cast(pk_unpack_info->ptr()), - pk_unpack_info->get_current_pos()); - } - - for (uint i = 0; i < table->s->fields; i++) { - /* Don't pack decodable PK key parts */ - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { - continue; - } - - Field *const field = table->field[i]; - if (m_encoder_arr[i].maybe_null()) { - char *data = const_cast(m_storage_record.ptr()); - if (has_ttl) { - data += ROCKSDB_SIZEOF_TTL_RECORD; - } - - if (field->is_null()) { - data[m_encoder_arr[i].m_null_offset] |= m_encoder_arr[i].m_null_mask; - /* Don't write anything for NULL values */ - continue; - } - } - - if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_BLOB) { - my_core::Field_blob *blob = (my_core::Field_blob *)field; - /* Get the number of bytes needed to store length*/ - const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; - - /* Store the length of the value */ - m_storage_record.append(reinterpret_cast(blob->ptr), - length_bytes); - - /* Store the blob value itself */ - char *data_ptr; - memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **)); - m_storage_record.append(data_ptr, blob->get_length()); - } else if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_VARCHAR) { - Field_varstring *const field_var = (Field_varstring *)field; - uint data_len; - /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) { - data_len = field_var->ptr[0]; - } else { - DBUG_ASSERT(field_var->length_bytes == 2); - data_len = uint2korr(field_var->ptr); - } - m_storage_record.append(reinterpret_cast(field_var->ptr), - field_var->length_bytes + data_len); - } else { - /* Copy the field data */ - const uint len = field->pack_length_in_rec(); - m_storage_record.append(reinterpret_cast(field->ptr), len); - - /* - Check if this is the TTL field within the table, if so store the TTL - in the front of the record as well here. - */ - if (has_ttl && has_ttl_column && - i == m_pk_descr->get_ttl_field_offset()) { - DBUG_ASSERT(len == ROCKSDB_SIZEOF_TTL_RECORD); - DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG); - DBUG_ASSERT(m_pk_descr->get_ttl_field_offset() != UINT_MAX); - - char *const data = const_cast(m_storage_record.ptr()); - uint64 ts = uint8korr(field->ptr); -#ifndef NDEBUG - ts += rdb_dbug_set_ttl_rec_ts(); -#endif - rdb_netbuf_store_uint64(reinterpret_cast(data), ts); - - // If this is an update and the timestamp has been updated, take note - // so we can avoid updating SKs unnecessarily. - if (!row_info.old_pk_slice.empty()) { - m_ttl_bytes_updated = - memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } - // Store timestamp in m_ttl_bytes to propagate to update_sk - memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); - } - } - } - - if (should_store_row_debug_checksums()) { - const uint32_t key_crc32 = my_core::crc32( - 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); - const uint32_t val_crc32 = - my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), - m_storage_record.length()); - uchar key_crc_buf[RDB_CHECKSUM_SIZE]; - uchar val_crc_buf[RDB_CHECKSUM_SIZE]; - rdb_netbuf_store_uint32(key_crc_buf, key_crc32); - rdb_netbuf_store_uint32(val_crc_buf, val_crc32); - m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1); - m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE); - m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE); - } - - *packed_rec = - rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length()); - return HA_EXIT_SUCCESS; } -/* - @brief - Setup which fields will be unpacked when reading rows - - @detail - Three special cases when we still unpack all fields: - - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all fields to find whether there is a row checksum at the end. We could - skip the fields instead of decoding them, but currently we do decoding.) - - On index merge as bitmap is cleared during that operation - - @seealso - ha_rocksdb::setup_field_converters() - ha_rocksdb::convert_record_from_storage_format() -*/ -void ha_rocksdb::setup_read_decoders() { - m_decoders_vect.clear(); - m_key_requested = false; - - int last_useful = 0; - int skip_size = 0; - - for (uint i = 0; i < table->s->fields; i++) { - // bitmap is cleared on index merge, but it still needs to decode columns - const bool field_requested = - m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_clear_all(table->read_set) || - bitmap_is_set(table->read_set, table->field[i]->field_index); - - // We only need the decoder if the whole record is stored. - if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { - // the field potentially needs unpacking - if (field_requested) { - // the field is in the read set - m_key_requested = true; - } - continue; - } - - if (field_requested) { - // We will need to decode this field - m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); - last_useful = m_decoders_vect.size(); - skip_size = 0; - } else { - if (m_encoder_arr[i].uses_variable_len_encoding() || - m_encoder_arr[i].maybe_null()) { - // For variable-length field, we need to read the data and skip it - m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size}); - skip_size = 0; - } else { - // Fixed-width field can be skipped without looking at it. - // Add appropriate skip_size to the next field. - skip_size += m_encoder_arr[i].m_pack_length_in_rec; - } - } - } - - // It could be that the last few elements are varchars that just do - // skipping. Remove them. - m_decoders_vect.erase(m_decoders_vect.begin() + last_useful, - m_decoders_vect.end()); -} - -#ifndef NDEBUG +#ifndef DBUG_OFF void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) { std::string str(on_disk_rec->data(), on_disk_rec->size()); on_disk_rec->Reset(); @@ -6231,17 +6447,6 @@ void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) { on_disk_rec->PinSelf(rocksdb::Slice(res)); } -void dbug_modify_key_varchar8(String &on_disk_rec) { - std::string res; - // The key starts with index number - res.append(on_disk_rec.ptr(), Rdb_key_def::INDEX_NUMBER_SIZE); - - // Then, a mem-comparable form of a varchar(8) value. - res.append("ABCDE\0\0\0\xFC", 9); - on_disk_rec.length(0); - on_disk_rec.append(res.data(), res.size()); -} - void dbug_create_err_inplace_alter() { my_printf_error(ER_UNKNOWN_ERROR, "Intentional failure in inplace alter occurred.", MYF(0)); @@ -6250,7 +6455,6 @@ void dbug_create_err_inplace_alter() { int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, uchar *const buf) { - DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", dbug_append_garbage_at_end(&m_retrieved_record);); DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", @@ -6261,91 +6465,6 @@ int ha_rocksdb::convert_record_from_storage_format( return convert_record_from_storage_format(key, &m_retrieved_record, buf); } -int ha_rocksdb::convert_blob_from_storage_format( - my_core::Field_blob *const blob, - Rdb_string_reader *const reader, - bool decode) -{ - /* Get the number of bytes needed to store length*/ - const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr; - - const char *data_len_str; - if (!(data_len_str = reader->read(length_bytes))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - memcpy(blob->ptr, data_len_str, length_bytes); - - const uint32 data_len = blob->get_length( - reinterpret_cast(data_len_str), length_bytes); - const char *blob_ptr; - if (!(blob_ptr = reader->read(data_len))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) { - // set 8-byte pointer to 0, like innodb does (relevant for 32-bit - // platforms) - memset(blob->ptr + length_bytes, 0, 8); - memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **)); - } - - return HA_EXIT_SUCCESS; -} - -int ha_rocksdb::convert_varchar_from_storage_format( - my_core::Field_varstring *const field_var, - Rdb_string_reader *const reader, - bool decode) -{ - const char *data_len_str; - if (!(data_len_str = reader->read(field_var->length_bytes))) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - - uint data_len; - /* field_var->length_bytes is 1 or 2 */ - if (field_var->length_bytes == 1) { - data_len = (uchar)data_len_str[0]; - } else { - DBUG_ASSERT(field_var->length_bytes == 2); - data_len = uint2korr(data_len_str); - } - - if (data_len > field_var->field_length) { - /* The data on disk is longer than table DDL allows? */ - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (!reader->read(data_len)) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) { - memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len); - } - - return HA_EXIT_SUCCESS; -} - -int ha_rocksdb::convert_field_from_storage_format( - my_core::Field *const field, - Rdb_string_reader *const reader, - bool decode, - uint len) -{ - const char *data_bytes; - if (len > 0) { - if ((data_bytes = reader->read(len)) == nullptr) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (decode) - memcpy(field->ptr, data_bytes, len); - } - - return HA_EXIT_SUCCESS; -} - /* @brief Unpack the record in this->m_retrieved_record and this->m_last_rowkey from @@ -6362,8 +6481,8 @@ int ha_rocksdb::convert_field_from_storage_format( m_retrieved_record). @seealso - ha_rocksdb::setup_read_decoders() Sets up data structures which tell which - columns to decode. + rdb_converter::setup_read_decoders() Sets up data structures which tell + which columns to decode. @return 0 OK @@ -6373,241 +6492,7 @@ int ha_rocksdb::convert_field_from_storage_format( int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, const rocksdb::Slice *const value, uchar *const buf) { - Rdb_string_reader reader(value); - - /* - Decode PK fields from the key - */ - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1", - dbug_modify_key_varchar8(m_last_rowkey);); - - const rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(), - m_last_rowkey.length()); - const char *unpack_info = nullptr; - uint16 unpack_info_len = 0; - rocksdb::Slice unpack_slice; - - /* If it's a TTL record, skip the 8 byte TTL value */ - const char *ttl_bytes; - if (m_pk_descr->has_ttl()) { - if ((ttl_bytes = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) { - memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD); - } else { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - } - - /* Other fields are decoded from the value */ - const char *null_bytes = nullptr; - if (m_null_bytes_in_rec && !(null_bytes = reader.read(m_null_bytes_in_rec))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - if (m_maybe_unpack_info) { - unpack_info = reader.get_current_ptr(); - if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) || - !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - unpack_info_len = - rdb_netbuf_to_uint16(reinterpret_cast(unpack_info + 1)); - unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len); - - reader.read(unpack_info_len - - Rdb_key_def::get_unpack_header_size(unpack_info[0])); - } - - int err = HA_EXIT_SUCCESS; - if (m_key_requested) { - err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, - unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */); - } - - if (err != HA_EXIT_SUCCESS) { - return err; - } - - for (auto it = m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) { - const Rdb_field_encoder *const field_dec = it->m_field_enc; - const bool decode = it->m_decode; - const bool isNull = - field_dec->maybe_null() && - ((null_bytes[field_dec->m_null_offset] & field_dec->m_null_mask) != 0); - - Field *const field = table->field[field_dec->m_field_index]; - - /* Skip the bytes we need to skip */ - if (it->m_skip && !reader.read(it->m_skip)) { - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - uint field_offset = field->ptr - table->record[0]; - uint null_offset = field->null_offset(); - bool maybe_null = field->real_maybe_null(); - field->move_field(buf + field_offset, - maybe_null ? buf + null_offset : nullptr, - field->null_bit); - // WARNING! - Don't return before restoring field->ptr and field->null_ptr! - - if (isNull) { - if (decode) { - /* This sets the NULL-bit of this record */ - field->set_null(); - /* - Besides that, set the field value to default value. CHECKSUM TABLE - depends on this. - */ - memcpy(field->ptr, table->s->default_values + field_offset, - field->pack_length()); - } - } else { - if (decode) { - field->set_notnull(); - } - - if (field_dec->m_field_type == MYSQL_TYPE_BLOB) { - err = convert_blob_from_storage_format( - (my_core::Field_blob *) field, &reader, decode); - } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) { - err = convert_varchar_from_storage_format( - (my_core::Field_varstring *) field, &reader, decode); - } else { - err = convert_field_from_storage_format( - field, &reader, decode, field_dec->m_pack_length_in_rec); - } - } - - // Restore field->ptr and field->null_ptr - field->move_field(table->record[0] + field_offset, - maybe_null ? table->record[0] + null_offset : nullptr, - field->null_bit); - - if (err != HA_EXIT_SUCCESS) { - return err; - } - } - - if (m_verify_row_debug_checksums) { - if (reader.remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE && - reader.read(1)[0] == RDB_CHECKSUM_DATA_TAG) { - uint32_t stored_key_chksum = - rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); - uint32_t stored_val_chksum = - rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE)); - - const uint32_t computed_key_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); - const uint32_t computed_val_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(value), - value->size() - RDB_CHECKSUM_CHUNK_SIZE); - - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", - stored_key_chksum++;); - - if (stored_key_chksum != computed_key_chksum) { - m_pk_descr->report_checksum_mismatch(true, key->data(), key->size()); - return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; - } - - DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", - stored_val_chksum++;); - if (stored_val_chksum != computed_val_chksum) { - m_pk_descr->report_checksum_mismatch(false, value->data(), - value->size()); - return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH; - } - - m_row_checksums_checked++; - } - if (reader.remaining_bytes()) - return HA_ERR_ROCKSDB_CORRUPT_DATA; - } - - return HA_EXIT_SUCCESS; -} - -void ha_rocksdb::get_storage_type(Rdb_field_encoder *const encoder, - const uint &kp) { - // STORE_SOME uses unpack_info. - if (m_pk_descr->has_unpack_info(kp)) { - DBUG_ASSERT(m_pk_descr->can_unpack(kp)); - encoder->m_storage_type = Rdb_field_encoder::STORE_SOME; - m_maybe_unpack_info = true; - } else if (m_pk_descr->can_unpack(kp)) { - encoder->m_storage_type = Rdb_field_encoder::STORE_NONE; - } -} - -/* - Setup data needed to convert table->record[] to and from record storage - format. - - @seealso - ha_rocksdb::convert_record_to_storage_format, - ha_rocksdb::convert_record_from_storage_format -*/ - -void ha_rocksdb::setup_field_converters() { - uint i; - uint null_bytes = 0; - uchar cur_null_mask = 0x1; - - DBUG_ASSERT(m_encoder_arr == nullptr); - m_encoder_arr = static_cast( - my_malloc(table->s->fields * sizeof(Rdb_field_encoder), MYF(0))); - if (m_encoder_arr == nullptr) { - return; - } - - for (i = 0; i < table->s->fields; i++) { - Field *const field = table->field[i]; - m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL; - - /* - Check if this field is - - a part of primary key, and - - it can be decoded back from its key image. - If both hold, we don't need to store this field in the value part of - RocksDB's key-value pair. - - If hidden pk exists, we skip this check since the field will never be - part of the hidden pk. - */ - if (!has_hidden_pk(table)) { - KEY *const pk_info = &table->key_info[table->s->primary_key]; - for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { - /* key_part->fieldnr is counted from 1 */ - if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) { - get_storage_type(&m_encoder_arr[i], kp); - break; - } - } - } - - m_encoder_arr[i].m_field_type = field->real_type(); - m_encoder_arr[i].m_field_index = i; - m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec(); - - if (field->real_maybe_null()) { - m_encoder_arr[i].m_null_mask = cur_null_mask; - m_encoder_arr[i].m_null_offset = null_bytes; - if (cur_null_mask == 0x80) { - cur_null_mask = 0x1; - null_bytes++; - } else - cur_null_mask = cur_null_mask << 1; - } else { - m_encoder_arr[i].m_null_mask = 0; - } - } - - /* Count the last, unfinished NULL-bits byte */ - if (cur_null_mask != 0x1) - null_bytes++; - - m_null_bytes_in_rec = null_bytes; + return m_converter->decode(m_pk_descr, buf, key, value); } int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, @@ -6644,8 +6529,8 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */ max_packed_sk_len = pack_key_len; for (uint i = 0; i < table_arg->s->keys; i++) { - if (i == table_arg->s->primary_key) /* Primary key was processed above */ - continue; + /* Primary key was processed above */ + if (i == table_arg->s->primary_key) continue; // TODO: move this into get_table_handler() ?? kd_arr[i]->setup(table_arg, tbl_def_arg); @@ -6734,25 +6619,10 @@ void ha_rocksdb::free_key_buffers() { } #ifdef MARIAROCKS_NOT_YET -void ha_rocksdb::set_use_read_free_rpl(const char *const whitelist) { - DBUG_ASSERT(whitelist != nullptr); - -#if defined(HAVE_PSI_INTERFACE) - Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables); -#else - Regex_list_handler regex_handler; #endif - - if (!regex_handler.set_patterns(whitelist)) { - warn_about_bad_patterns(®ex_handler, "read_free_rpl_tables"); - } - - m_use_read_free_rpl = regex_handler.matches(m_tbl_def->base_tablename()); -} -#endif - void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { - DBUG_ASSERT(whitelist != nullptr); + const char *const wl = + whitelist ? whitelist : DEFAULT_SKIP_UNIQUE_CHECK_TABLES; #if defined(HAVE_PSI_INTERFACE) Regex_list_handler regex_handler(key_rwlock_skip_unique_check_tables); @@ -6760,7 +6630,7 @@ void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { Regex_list_handler regex_handler; #endif - if (!regex_handler.set_patterns(whitelist)) { + if (!regex_handler.set_patterns(wl)) { warn_about_bad_patterns(®ex_handler, "skip_unique_check_tables"); } @@ -6807,8 +6677,8 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { "dictionary"); DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE); } - m_lock_rows = RDB_LOCK_NONE; + m_lock_rows = RDB_LOCK_NONE; m_key_descr_arr = m_tbl_def->m_key_descr_arr; /* @@ -6837,7 +6707,15 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { */ init_with_fields(); - setup_field_converters(); + /* Initialize decoder */ + m_converter = std::make_shared(ha_thd(), m_tbl_def, table); + + /* + Update m_ttl_bytes address to same as Rdb_converter's m_ttl_bytes. + Remove this code after moving convert_record_to_storage_format() into + Rdb_converter class. + */ + m_ttl_bytes = m_converter->get_ttl_bytes_buffer(); /* MariaDB: adjust field->part_of_key for PK columns. We can only do it here @@ -6880,11 +6758,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { The following load_XXX code calls row decode functions, and they do that without having done ::external_lock() or index_init()/rnd_init(). (Note: this also means we're doing a read when there was no - setup_field_converters() call) + rdb_converter::setup_field_encoders() call) Initialize the necessary variables for them: */ - m_verify_row_debug_checksums = false; /* Load auto_increment value only once on first use. */ if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) { @@ -6902,10 +6779,7 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { stats.block_size = rocksdb_tbl_options->block_size; #ifdef MARIAROCKS_NOT_YET // MDEV-10976 - /* Determine at open whether we can use Read Free Replication or not */ - set_use_read_free_rpl(THDVAR(ha_thd(), read_free_rpl_tables)); #endif - /* Determine at open whether we should skip unique checks for this table */ set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables)); @@ -6917,19 +6791,15 @@ int ha_rocksdb::close(void) { m_pk_descr = nullptr; m_key_descr_arr = nullptr; - + m_converter = nullptr; free_key_buffers(); - my_free(m_encoder_arr); - m_encoder_arr = nullptr; - if (m_table_handler != nullptr) { rdb_open_tables.release_table_handler(m_table_handler); m_table_handler = nullptr; } // These are needed to suppress valgrind errors in rocksdb.partition - m_storage_record.free(); m_last_rowkey.free(); m_sk_tails.free(); m_sk_tails_old.free(); @@ -7020,61 +6890,72 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s, int err; switch (s.code()) { - case rocksdb::Status::Code::kOk: - err = HA_EXIT_SUCCESS; - break; - case rocksdb::Status::Code::kNotFound: - err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND; - break; - case rocksdb::Status::Code::kCorruption: - err = HA_ERR_ROCKSDB_STATUS_CORRUPTION; - break; - case rocksdb::Status::Code::kNotSupported: - err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED; - break; - case rocksdb::Status::Code::kInvalidArgument: - err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT; - break; - case rocksdb::Status::Code::kIOError: - err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE - : HA_ERR_ROCKSDB_STATUS_IO_ERROR; - break; - case rocksdb::Status::Code::kMergeInProgress: - err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS; - break; - case rocksdb::Status::Code::kIncomplete: - err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE; - break; - case rocksdb::Status::Code::kShutdownInProgress: - err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS; - break; - case rocksdb::Status::Code::kTimedOut: - err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT; - break; - case rocksdb::Status::Code::kAborted: - err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT - : HA_ERR_ROCKSDB_STATUS_ABORTED; - break; - case rocksdb::Status::Code::kBusy: - err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK - : HA_ERR_ROCKSDB_STATUS_BUSY; - break; - case rocksdb::Status::Code::kExpired: - err = HA_ERR_ROCKSDB_STATUS_EXPIRED; - break; - case rocksdb::Status::Code::kTryAgain: - err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN; - break; - default: - DBUG_ASSERT(0); - return -1; + case rocksdb::Status::Code::kOk: + err = HA_EXIT_SUCCESS; + break; + case rocksdb::Status::Code::kNotFound: + err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND; + break; + case rocksdb::Status::Code::kCorruption: + err = HA_ERR_ROCKSDB_STATUS_CORRUPTION; + break; + case rocksdb::Status::Code::kNotSupported: + err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED; + break; + case rocksdb::Status::Code::kInvalidArgument: + err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT; + break; + case rocksdb::Status::Code::kIOError: + err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE + : HA_ERR_ROCKSDB_STATUS_IO_ERROR; + break; + case rocksdb::Status::Code::kMergeInProgress: + err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS; + break; + case rocksdb::Status::Code::kIncomplete: + err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE; + break; + case rocksdb::Status::Code::kShutdownInProgress: + err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS; + break; + case rocksdb::Status::Code::kTimedOut: + err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT; + break; + case rocksdb::Status::Code::kAborted: + err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT + : HA_ERR_ROCKSDB_STATUS_ABORTED; + break; + case rocksdb::Status::Code::kBusy: + err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK + : HA_ERR_ROCKSDB_STATUS_BUSY; + break; + case rocksdb::Status::Code::kExpired: + err = HA_ERR_ROCKSDB_STATUS_EXPIRED; + break; + case rocksdb::Status::Code::kTryAgain: + err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN; + break; + default: + DBUG_ASSERT(0); + return -1; + } + + std::string errMsg; + if (s.IsLockLimit()) { + errMsg = + "Operation aborted: Failed to acquire lock due to " + "rocksdb_max_row_locks limit"; + } else { + errMsg = s.ToString(); } if (opt_msg) { - std::string concatenated_error = s.ToString() + " (" + std::string(opt_msg) + ")"; - my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), rocksdb_hton_name); + std::string concatenated_error = errMsg + " (" + std::string(opt_msg) + ")"; + my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), + rocksdb_hton_name); } else { - my_error(ER_GET_ERRMSG, MYF(0), s.code(), s.ToString().c_str(), rocksdb_hton_name); + my_error(ER_GET_ERRMSG, MYF(0), s.code(), errMsg.c_str(), + rocksdb_hton_name); } return err; @@ -7084,8 +6965,8 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s, static const std::set RDB_INDEX_COLLATIONS = { COLLATION_BINARY, COLLATION_UTF8_BIN, COLLATION_LATIN1_BIN}; -static bool -rdb_is_index_collation_supported(const my_core::Field *const field) { +static bool rdb_is_index_collation_supported( + const my_core::Field *const field) { const my_core::enum_field_types type = field->real_type(); /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING || @@ -7145,8 +7026,6 @@ int ha_rocksdb::create_key_defs( DBUG_ASSERT(table_arg->s != nullptr); - uint i; - /* These need to be one greater than MAX_INDEXES since the user can create MAX_INDEXES secondary keys and no primary key which would cause us @@ -7163,6 +7042,36 @@ int ha_rocksdb::create_key_defs( DBUG_RETURN(HA_EXIT_FAILURE); } + uint64 ttl_duration = 0; + std::string ttl_column; + uint ttl_field_offset; + + uint err; + if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg, + &ttl_duration))) { + DBUG_RETURN(err); + } + + if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column, + &ttl_field_offset))) { + DBUG_RETURN(err); + } + + /* We don't currently support TTL on tables with hidden primary keys. */ + if (ttl_duration > 0 && has_hidden_pk(table_arg)) { + my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* + If TTL duration is not specified but TTL column was specified, throw an + error because TTL column requires duration. + */ + if (ttl_duration == 0 && !ttl_column.empty()) { + my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + if (!old_tbl_def_arg) { /* old_tbl_def doesn't exist. this means we are in the process of creating @@ -7171,9 +7080,9 @@ int ha_rocksdb::create_key_defs( Get the index numbers (this will update the next_index_number) and create Rdb_key_def structures. */ - for (i = 0; i < tbl_def_arg->m_key_count; i++) { - if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], - cfs[i])) { + for (uint i = 0; i < tbl_def_arg->m_key_count; i++) { + if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], cfs[i], + ttl_duration, ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -7184,7 +7093,8 @@ int ha_rocksdb::create_key_defs( generate the necessary new key definitions if any. */ if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg, - old_tbl_def_arg, cfs)) { + old_tbl_def_arg, cfs, ttl_duration, + ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } } @@ -7270,8 +7180,8 @@ int ha_rocksdb::create_cfs( // Generate the name for the column family to use. bool per_part_match_found = false; - std::string cf_name = generate_cf_name(i, table_arg, tbl_def_arg, - &per_part_match_found); + std::string cf_name = + generate_cf_name(i, table_arg, tbl_def_arg, &per_part_match_found); // Prevent create from using the system column family. if (cf_name == DEFAULT_SYSTEM_CF_NAME) { @@ -7316,7 +7226,8 @@ int ha_rocksdb::create_cfs( int ha_rocksdb::create_inplace_key_defs( const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, - const std::array &cfs) const { + const std::array &cfs, + uint64 ttl_duration, const std::string &ttl_column) const { DBUG_ENTER_FUNC(); std::shared_ptr *const old_key_descr = @@ -7342,10 +7253,11 @@ int ha_rocksdb::create_inplace_key_defs( struct Rdb_index_info index_info; if (!dict_manager.get_index_info(gl_index_id, &index_info)) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Could not get index information " - "for Index Number (%u,%u), table %s", - gl_index_id.cf_id, gl_index_id.index_id, - old_tbl_def_arg->full_tablename().c_str()); + sql_print_error( + "RocksDB: Could not get index information " + "for Index Number (%u,%u), table %s", + gl_index_id.cf_id, gl_index_id.index_id, + old_tbl_def_arg->full_tablename().c_str()); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -7369,7 +7281,7 @@ int ha_rocksdb::create_inplace_key_defs( dict_manager.get_stats(gl_index_id), index_info.m_index_flags, ttl_rec_offset, index_info.m_ttl_duration); } else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i], - cfs[i])) { + cfs[i], ttl_duration, ttl_column)) { DBUG_RETURN(HA_EXIT_FAILURE); } @@ -7519,44 +7431,16 @@ int ha_rocksdb::compare_key_parts(const KEY *const old_key, 0 - Ok other - error, either given table ddl is not supported by rocksdb or OOM. */ -int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, +int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint i, const Rdb_tbl_def *const tbl_def_arg, std::shared_ptr *const new_key_def, - const struct key_def_cf_info &cf_info) const { + const struct key_def_cf_info &cf_info, + uint64 ttl_duration, + const std::string &ttl_column) const { DBUG_ENTER_FUNC(); DBUG_ASSERT(*new_key_def == nullptr); - uint64 ttl_duration = 0; - std::string ttl_column; - uint ttl_field_offset; - - uint err; - if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg, - &ttl_duration))) { - DBUG_RETURN(err); - } - - if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column, - &ttl_field_offset))) { - DBUG_RETURN(err); - } - - /* We don't currently support TTL on tables with hidden primary keys. */ - if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) { - my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); - DBUG_RETURN(HA_EXIT_FAILURE); - } - - /* - If TTL duration is not specified but TTL column was specified, throw an - error because TTL column requires duration. - */ - if (ttl_duration == 0 && !ttl_column.empty()) { - my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str()); - DBUG_RETURN(HA_EXIT_FAILURE); - } - const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager); const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST; uchar index_type; @@ -7605,7 +7489,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, if (!ttl_column.empty()) { (*new_key_def)->m_ttl_column = ttl_column; } - + // initialize key_def + (*new_key_def)->setup(table_arg, tbl_def_arg); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -7613,7 +7498,7 @@ int rdb_normalize_tablename(const std::string &tablename, std::string *const strbuf) { if (tablename.size() < 2 || tablename[0] != '.' || (tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) { - DBUG_ASSERT(0); // We were not passed table name? + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; } @@ -7623,7 +7508,7 @@ int rdb_normalize_tablename(const std::string &tablename, } if (pos == std::string::npos) { - DBUG_ASSERT(0); // We were not passed table name? + DBUG_ASSERT(0); // We were not passed table name? return HA_ERR_ROCKSDB_INVALID_TABLE; } @@ -7737,95 +7622,34 @@ int rdb_split_normalized_tablename(const std::string &fullname, return HA_EXIT_SUCCESS; } -/** - @brief - create() is called to create a database. The variable name will have the name - of the table. +/* + Create a table's Rdb_tbl_def and its Rdb_key_defs and store table information + into MyRocks Data Dictionary + The method is called during create table/partition, truncate table/partition - @details - When create() is called you do not need to worry about - opening the table. Also, the .frm file will have already been - created so adjusting create_info is not necessary. You can overwrite - the .frm file at this point if you wish to change the table - definition, but there are no methods currently provided for doing - so. - - Called from handle.cc by ha_create_table(). + @param table_name IN table's name formated as + 'dbname.tablename' + @param table_arg IN sql table + @param auto_increment_value IN specified table's auto increment value @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) - - @see - ha_create_table() in handle.cc */ - -int ha_rocksdb::create(const char *const name, TABLE *const table_arg, - HA_CREATE_INFO *const create_info) { +int ha_rocksdb::create_table(const std::string &table_name, + const TABLE *table_arg, + ulonglong auto_increment_value) { DBUG_ENTER_FUNC(); - DBUG_ASSERT(table_arg != nullptr); - DBUG_ASSERT(create_info != nullptr); - if (create_info->data_file_name) { - // DATA DIRECTORY is used to create tables under a specific location - // outside the MySQL data directory. We don't support this for MyRocks. - // The `rocksdb_datadir` setting should be used to configure RocksDB data - // directory. - print_error(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED, - MYF(ME_JUST_WARNING)); - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - if (create_info->index_file_name) { - // Similar check for INDEX DIRECTORY as well. - print_error(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED, - MYF(ME_JUST_WARNING)); - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - - std::string str; int err; - THD *const thd = my_core::thd_get_current_thd(); - - if (get_table_if_exists(name)) { - if (thd->lex->sql_command == SQLCOM_TRUNCATE) { - err = delete_table(name); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - } else { - err = rdb_normalize_tablename(name, &str); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name); - DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); - } - } - - /* - Construct dbname.tablename ourselves, because parititioning - passes strings like "./test/t14#P#p0" for individual partitions, - while table_arg->s->table_name has none of that. - */ - err = rdb_normalize_tablename(name, &str); - if (err != HA_EXIT_SUCCESS) { - DBUG_RETURN(err); - } - - if (contains_foreign_key(thd)) { - my_error(ER_NOT_SUPPORTED_YET, MYF(0), - "FOREIGN KEY for the RocksDB storage engine"); - DBUG_RETURN(HA_ERR_UNSUPPORTED); - } - const std::unique_ptr wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); /* Create table/key descriptions and put them into the data dictionary */ - m_tbl_def = new Rdb_tbl_def(str); + m_tbl_def = new Rdb_tbl_def(table_name); uint n_keys = table_arg->s->keys; @@ -7835,6 +7659,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, */ if (has_hidden_pk(table_arg)) { n_keys += 1; + // reset hidden pk id + // the starting valid value for hidden pk is 1 + m_tbl_def->m_hidden_pk_val = 1; } m_key_descr_arr = new std::shared_ptr[n_keys]; @@ -7848,9 +7675,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; - if (create_info->auto_increment_value) { + if (auto_increment_value) { bool autoinc_upgrade_test = false; - m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; + m_tbl_def->m_auto_incr_val = auto_increment_value; DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;); if (!autoinc_upgrade_test) { auto s = dict_manager.put_auto_incr_val( @@ -7888,6 +7715,86 @@ error: DBUG_RETURN(err); } +/** + @brief + create() is called to create a table. The variable name will have the name + of the table. + + @details + When create() is called you do not need to worry about + opening the table. Also, the .frm file will have already been + created so adjusting create_info is not necessary. You can overwrite + the .frm file at this point if you wish to change the table + definition, but there are no methods currently provided for doing + so. + + Called from handle.cc by ha_create_table(). + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) + + @see + ha_create_table() in handle.cc +*/ + +int ha_rocksdb::create(const char *const name, TABLE *const table_arg, + HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table_arg != nullptr); + DBUG_ASSERT(create_info != nullptr); + + if (create_info->data_file_name) { + // DATA DIRECTORY is used to create tables under a specific location + // outside the MySQL data directory. We don't support this for MyRocks. + // The `rocksdb_datadir` setting should be used to configure RocksDB data + // directory. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED); + } + + if (create_info->index_file_name) { + // Similar check for INDEX DIRECTORY as well. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED); + } + + int err; + /* + Construct dbname.tablename ourselves, because parititioning + passes strings like "./test/t14#P#p0" for individual partitions, + while table_arg->s->table_name has none of that. + */ + std::string str; + err = rdb_normalize_tablename(name, &str); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + // FOREIGN KEY isn't supported yet + THD *const thd = my_core::thd_get_current_thd(); + if (contains_foreign_key(thd)) { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "FOREIGN KEY for the RocksDB storage engine"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + + // Check whether Data Dictionary contain information + Rdb_tbl_def *tbl = ddl_manager.find(str); + if (tbl != nullptr) { + if (thd->lex->sql_command == SQLCOM_TRUNCATE) { + err = delete_table(tbl); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + } else { + my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name); + DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); + } + } + + DBUG_RETURN(create_table(str, table_arg, create_info->auto_increment_value)); +} + /** @note This function is used only when the table has not yet been opened, and @@ -7931,9 +7838,10 @@ bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, - const bool &full_key_match, + const bool /* unused */, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for the first record such that index_tuple= lookup_tuple. @@ -7942,6 +7850,9 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice); while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } /* If TTL is enabled we need to check if the given key has already expired from the POV of the current transaction. If it has, try going to the next @@ -7963,9 +7874,10 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, } int ha_rocksdb::read_before_key(const Rdb_key_def &kd, - const bool &full_key_match, + const bool full_key_match, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for record with the biggest t.key such that t.key < lookup_tuple. @@ -7973,6 +7885,9 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice); while (is_valid(m_scan_it)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } /* We are using full key and we've hit an exact match, or... @@ -7997,6 +7912,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, int ha_rocksdb::read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); /* We are looking for the first record such that @@ -8014,6 +7930,9 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, */ while (is_valid(m_scan_it) && kd.has_ttl() && should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); } @@ -8022,7 +7941,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, int ha_rocksdb::position_to_correct_key( const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool &full_key_match, const uchar *const key, + const bool full_key_match, const uchar *const key, const key_part_map &keypart_map, const rocksdb::Slice &key_slice, bool *const move_forward, const int64_t ttl_filter_ts) { int rc = 0; @@ -8030,65 +7949,66 @@ int ha_rocksdb::position_to_correct_key( *move_forward = true; switch (find_flag) { - case HA_READ_KEY_EXACT: - rc = - read_key_exact(kd, m_scan_it, full_key_match, key_slice, ttl_filter_ts); - break; - case HA_READ_BEFORE_KEY: - *move_forward = false; - rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) { - /* The record we've got is not from this index */ - rc = HA_ERR_KEY_NOT_FOUND; - } - break; - case HA_READ_AFTER_KEY: - case HA_READ_KEY_OR_NEXT: - rc = read_after_key(kd, key_slice, ttl_filter_ts); - if (rc == 0 && !kd.covers_key(m_scan_it->key())) { - /* The record we've got is not from this index */ - rc = HA_ERR_KEY_NOT_FOUND; - } - break; - case HA_READ_KEY_OR_PREV: - case HA_READ_PREFIX: - /* This flag is not used by the SQL layer, so we don't support it yet. */ - rc = HA_ERR_UNSUPPORTED; - break; - case HA_READ_PREFIX_LAST: - case HA_READ_PREFIX_LAST_OR_PREV: - *move_forward = false; - /* - Find the last record with the specified index prefix lookup. - - HA_READ_PREFIX_LAST requires that the record has the - prefix=lookup (if there are no such records, - HA_ERR_KEY_NOT_FOUND should be returned). - - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no - records with prefix=lookup, we should return the last record - before that. - */ - rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); - if (rc == 0) { - const rocksdb::Slice &rkey = m_scan_it->key(); - if (!kd.covers_key(rkey)) { + case HA_READ_KEY_EXACT: + rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice, + ttl_filter_ts); + break; + case HA_READ_BEFORE_KEY: + *move_forward = false; + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { /* The record we've got is not from this index */ rc = HA_ERR_KEY_NOT_FOUND; - } else if (find_flag == HA_READ_PREFIX_LAST) { - uint size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, - key, keypart_map); - rocksdb::Slice lookup_tuple(reinterpret_cast(m_sk_packed_tuple), - size); - - // We need to compare the key we've got with the original search prefix. - if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + } + break; + case HA_READ_AFTER_KEY: + case HA_READ_KEY_OR_NEXT: + rc = read_after_key(kd, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { + /* The record we've got is not from this index */ + rc = HA_ERR_KEY_NOT_FOUND; + } + break; + case HA_READ_KEY_OR_PREV: + case HA_READ_PREFIX: + /* This flag is not used by the SQL layer, so we don't support it yet. */ + rc = HA_ERR_UNSUPPORTED; + break; + case HA_READ_PREFIX_LAST: + case HA_READ_PREFIX_LAST_OR_PREV: + *move_forward = false; + /* + Find the last record with the specified index prefix lookup. + - HA_READ_PREFIX_LAST requires that the record has the + prefix=lookup (if there are no such records, + HA_ERR_KEY_NOT_FOUND should be returned). + - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no + records with prefix=lookup, we should return the last record + before that. + */ + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0) { + const rocksdb::Slice &rkey = m_scan_it->key(); + if (!kd.covers_key(rkey)) { + /* The record we've got is not from this index */ rc = HA_ERR_KEY_NOT_FOUND; + } else if (find_flag == HA_READ_PREFIX_LAST) { + uint size = kd.pack_index_tuple(table, m_pack_buffer, + m_sk_packed_tuple, key, keypart_map); + rocksdb::Slice lookup_tuple( + reinterpret_cast(m_sk_packed_tuple), size); + + // We need to compare the key we've got with the original search + // prefix. + if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + rc = HA_ERR_KEY_NOT_FOUND; + } } } - } - break; - default: - DBUG_ASSERT(0); - break; + break; + default: + DBUG_ASSERT(0); + break; } return rc; @@ -8097,11 +8017,10 @@ int ha_rocksdb::position_to_correct_key( int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, const rocksdb::Slice &slice, - const int &bytes_changed_by_succ, + const int bytes_changed_by_succ, const key_range *const end_key, uint *const end_key_packed_size) { - if (find_flag == HA_READ_KEY_EXACT) - return slice.size(); + if (find_flag == HA_READ_KEY_EXACT) return slice.size(); if (find_flag == HA_READ_PREFIX_LAST) { /* @@ -8171,8 +8090,18 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, const rocksdb::Slice &rkey = m_scan_it->key(); const rocksdb::Slice &value = m_scan_it->value(); - bool covered_lookup = - m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap); +#ifndef DBUG_OFF + bool save_keyread_only = m_keyread_only; +#endif + DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; }); + + bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) || + kd.covers_lookup(&value, &m_lookup_bitmap); + +#ifndef DBUG_OFF + m_keyread_only = save_keyread_only; +#endif + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); @@ -8180,12 +8109,11 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, rc = HA_ERR_ROCKSDB_CORRUPT_DATA; } else { rc = kd.unpack_record(table, buf, &rkey, &value, - m_verify_row_debug_checksums); + m_converter->get_verify_row_debug_checksums()); global_stats.covered_secondary_key_lookups.inc(); } } else { - if (kd.m_is_reverse_cf) - move_forward = !move_forward; + if (kd.m_is_reverse_cf) move_forward = !move_forward; rc = find_icp_matching_index_rec(move_forward, buf); if (!rc) { @@ -8232,11 +8160,12 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const { DBUG_ENTER_FUNC(); - ulong base_flags = HA_READ_NEXT | // doesn't seem to be used + ulong base_flags = HA_READ_NEXT | // doesn't seem to be used HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV; - if (check_keyread_allowed(inx, part, all_parts)) + if (check_keyread_allowed(inx, part, all_parts)) { base_flags |= HA_KEYREAD_ONLY; + } if (inx == table_share->primary_key) { /* @@ -8292,13 +8221,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { rocksdb::Slice value = m_scan_it->value(); bool covered_lookup = - m_keyread_only || m_key_descr_arr[keyno]->covers_lookup( - table, &value, &m_lookup_bitmap); + (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) || + m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap); if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { rc = m_key_descr_arr[keyno]->unpack_record( - table, buf, &key, &value, m_verify_row_debug_checksums); + table, buf, &key, &value, + m_converter->get_verify_row_debug_checksums()); global_stats.covered_secondary_key_lookups.inc(); } else { + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk"); rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); } @@ -8348,9 +8279,10 @@ int ha_rocksdb::read_range_first(const key_range *const start_key, range_key_part = table->key_info[active_index].key_part; - if (!start_key) // Read first record + if (!start_key) { + // Read first record result = ha_index_first(table->record[0]); - else { + } else { #ifdef MARIAROCKS_NOT_YET if (is_using_prohibited_gap_locks( is_using_full_unique_key(active_index, start_key->keypart_map, @@ -8366,8 +8298,9 @@ int ha_rocksdb::read_range_first(const key_range *const start_key, start_key->keypart_map, start_key->flag, end_key); }) } - if (result) + if (result) { DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result); + } if (compare_key(end_range) <= 0) { DBUG_RETURN(HA_EXIT_SUCCESS); @@ -8444,14 +8377,23 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, const key_range *end_key) { DBUG_ENTER_FUNC(); + DBUG_EXECUTE_IF("myrocks_busy_loop_on_row_read", int debug_i = 0; + while (1) { debug_i++; }); + int rc = 0; + THD *thd = ha_thd(); + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + DBUG_RETURN(rc); + } + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; const uint actual_key_parts = kd.get_key_parts(); bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); - if (!end_key) - end_key = end_range; + if (!end_key) end_key = end_range; /* By default, we don't need the retrieved records to match the prefix */ m_sk_match_prefix = nullptr; @@ -8468,8 +8410,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, m_pk_packed_tuple, key, keypart_map); bool skip_lookup = is_blind_delete_enabled(); - rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, - skip_lookup, false); + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false); if (!rc && !skip_lookup) { #ifdef MARIAROCKS_NOT_YET @@ -8494,8 +8435,9 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, tmp_map); if (table->key_info[active_index].user_defined_key_parts != - kd.get_key_parts()) + kd.get_key_parts()) { using_full_key = false; + } } else { packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map); @@ -8549,14 +8491,20 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, bool use_all_keys = false; if (find_flag == HA_READ_KEY_EXACT && - my_count_bits(keypart_map) == kd.get_key_parts()) + my_count_bits(keypart_map) == kd.get_key_parts()) { use_all_keys = true; + } Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi_scan"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } /* This will open the iterator and position it at a record that's equal or greater than the lookup tuple. @@ -8573,9 +8521,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, slice, &move_forward, tx->m_snapshot_timestamp); if (rc) { - /* This status is returned on any error */ - table->status = STATUS_NOT_FOUND; - DBUG_RETURN(rc); + break; } m_skip_scan_it_next_call = false; @@ -8585,13 +8531,15 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, then we have all the rows we need. For a secondary key we now need to lookup the primary key. */ - if (active_index == table->s->primary_key) + if (active_index == table->s->primary_key) { rc = read_row_from_primary_key(buf); - else + } else { rc = read_row_from_secondary_key(buf, kd, move_forward); + } - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) + if (!should_recreate_snapshot(rc, is_new_snapshot)) { break; /* Exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -8599,7 +8547,10 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, } if (rc) { - /* the only possible error condition is record-not-found */ + /* + This status is returned on any error + the only possible error condition is record-not-found + */ table->status = STATUS_NOT_FOUND; } else { table->status = 0; @@ -8633,13 +8584,21 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, other - error code */ -int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, +int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward, uchar *const buf) { if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + THD *thd = ha_thd(); while (1) { - rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + int rc = rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } if (!is_valid(m_scan_it)) { table->status = STATUS_NOT_FOUND; @@ -8663,7 +8622,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, const rocksdb::Slice value = m_scan_it->value(); int err = kd.unpack_record(table, buf, &rkey, &value, - m_verify_row_debug_checksums); + m_converter->get_verify_row_debug_checksums()); if (err != HA_EXIT_SUCCESS) { return err; } @@ -8684,14 +8643,6 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, * row. */ break; } - /* - TODO: should we have this here, or RockDB handles this internally? - if (my_core::thd_killed(current_thd)) - { - rc= HA_ERR_INTERNAL_ERROR; // doesn't matter - break; - } - */ } } return HA_EXIT_SUCCESS; @@ -8723,14 +8674,14 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { // Rdb_transaction *tx= get_or_create_tx(table->in_use); // tx->snapshot= nullptr; - const bool save_verify_row_debug_checksums = m_verify_row_debug_checksums; - m_verify_row_debug_checksums = true; - + bool save_verify_row_debug_checksums = + m_converter->get_verify_row_debug_checksums(); + m_converter->set_verify_row_debug_checksums(true); /* For each secondary index, check that we can get a PK value from it */ // NO_LINT_DEBUG sql_print_verbose_info("CHECKTABLE %s: Checking table %s", table_name, table_name); - ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true + ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true ha_rows row_checksums = ha_rows(-1); bool first_index = true; @@ -8740,20 +8691,21 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { ha_index_init(keyno, true); ha_rows rows = 0; ha_rows checksums = 0; - if (first_index) - row_checksums_at_start = m_row_checksums_checked; + if (first_index) { + row_checksums_at_start = m_converter->get_row_checksums_checked(); + } int res; // NO_LINT_DEBUG sql_print_verbose_info("CHECKTABLE %s: Checking index %s", table_name, table->key_info[keyno].name); while (1) { - if (!rows) + if (!rows) { res = index_first(table->record[0]); - else + } else { res = index_next(table->record[0]); + } - if (res == HA_ERR_END_OF_FILE) - break; + if (res == HA_ERR_END_OF_FILE) break; if (res) { // error // NO_LINT_DEBUG @@ -8774,16 +8726,18 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(), rowkey_copy.length()))) { // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: .. row %lld: " - "failed to fetch row by rowid", - table_name, rows); + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "failed to fetch row by rowid", + table_name, rows); goto error; } longlong hidden_pk_id = 0; if (has_hidden_pk(table) && - read_hidden_pk_id_from_rowkey(&hidden_pk_id)) + read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { goto error; + } /* Check if we get the same PK value */ uint packed_size = m_pk_descr->pack_record( @@ -8804,9 +8758,10 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { if (packed_size != sec_key_copy.length() || memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) { // NO_LINT_DEBUG - sql_print_error("CHECKTABLE %s: .. row %lld: " - "secondary index value mismatch", - table_name, rows); + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "secondary index value mismatch", + table_name, rows); goto print_and_error; } rows++; @@ -8838,7 +8793,8 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { table_name, rows, checksums); if (first_index) { - row_checksums = m_row_checksums_checked - row_checksums_at_start; + row_checksums = + m_converter->get_row_checksums_checked() - row_checksums_at_start; first_index = false; } ha_index_end(); @@ -8851,16 +8807,16 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { } extra(HA_EXTRA_NO_KEYREAD); - m_verify_row_debug_checksums = save_verify_row_debug_checksums; + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); /* - TODO: we should check also for PK records that are missing in the secondary - indexes. + TODO(sergiy): we should check also for PK records that are missing in + the secondary indexes. For that, need to walk through the PK and check that every PK record has a proper counterpart in each secondary index. */ DBUG_RETURN(HA_ADMIN_OK); error: - m_verify_row_debug_checksums = save_verify_row_debug_checksums; + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); ha_index_or_rnd_end(); extra(HA_EXTRA_NO_KEYREAD); @@ -8870,10 +8826,11 @@ error: static void dbug_dump_str(FILE *const out, const char *const str, int len) { fprintf(out, "\""); for (int i = 0; i < len; i++) { - if (str[i] > 32) + if (str[i] > 32) { fprintf(out, "%c", str[i]); - else + } else { fprintf(out, "\\%d", str[i]); + } } fprintf(out, "\""); } @@ -8886,8 +8843,7 @@ static void dbug_dump_str(FILE *const out, const char *const str, int len) { void dbug_dump_database(rocksdb::DB *const db) { FILE *const out = fopen("/tmp/rocksdb.dump", "wt"); - if (!out) - return; + if (!out) return; rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions()); for (it->SeekToFirst(); it->Valid(); it->Next()) { @@ -8909,30 +8865,28 @@ rocksdb::Status ha_rocksdb::get_for_update( DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); const bool exclusive = m_lock_rows != RDB_LOCK_READ; - rocksdb::Status s = tx->get_for_update(column_family, key, value, exclusive); - - // If we have a lock conflict and we are running in READ COMMITTTED mode - // release and reacquire the snapshot and then retry the get_for_update(). - if (s.IsBusy() && !s.IsDeadlock() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { - tx->release_snapshot(); - tx->acquire_snapshot(false); - - s = tx->get_for_update(column_family, key, value, exclusive); - } + const bool do_validate = + my_core::thd_tx_isolation(ha_thd()) > ISO_READ_COMMITTED; + rocksdb::Status s = + tx->get_for_update(column_family, key, value, exclusive, do_validate); +#ifndef DBUG_OFF + ++rocksdb_num_get_for_update_calls; +#endif return s; } -bool ha_rocksdb::is_blind_delete_enabled() -{ +bool ha_rocksdb::is_blind_delete_enabled() { THD *thd = ha_thd(); + /* + Note: in MariaDB, thd->lex->table_count is only set for multi-table DELETE, + not for single-table DELETE. So we check thd->lex->query_tables instead. + */ return (THDVAR(thd, blind_delete_primary_key) && thd->lex->sql_command == SQLCOM_DELETE && - thd->lex->table_count == 1 && + thd->lex->query_tables && !thd->lex->query_tables->next_global && table->s->keys == 1 && - !has_hidden_pk(table) && - !thd->rgi_slave); + !has_hidden_pk(table) && !thd->rgi_slave); } /* @@ -8960,8 +8914,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid"); DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", { THD *thd = ha_thd(); - const char act[] = "now signal Reached " - "wait_for signal.rocksdb.get_row_by_rowid_let_running"; + const char act[] = + "now signal Reached " + "wait_for signal.rocksdb.get_row_by_rowid_let_running"; DBUG_ASSERT(opt_debug_sync_timeout > 0); DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); };); @@ -8970,8 +8925,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, rocksdb::Status s; /* Pretend row found without looking up */ - if (skip_lookup) - { + if (skip_lookup) { #ifdef MARIAROCKS_NOT_YET stats.rows_deleted_blind++; #endif @@ -8984,6 +8938,17 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, if (m_lock_rows == RDB_LOCK_NONE) { tx->acquire_snapshot(true); s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record); + } else if (m_insert_with_update && m_dup_pk_found) { + DBUG_ASSERT(m_pk_descr->get_keyno() == m_dupp_errkey); + DBUG_ASSERT(m_dup_pk_retrieved_record.length() == + m_retrieved_record.size()); + DBUG_ASSERT(memcmp(m_dup_pk_retrieved_record.ptr(), + m_retrieved_record.data(), + m_retrieved_record.size()) == 0); + + // do nothing - we already have the result in m_retrieved_record and + // already taken the lock + s = rocksdb::Status::OK(); } else { s = get_for_update(tx, m_pk_descr->get_cf(), key_slice, &m_retrieved_record); @@ -9038,8 +9003,7 @@ int ha_rocksdb::index_next(uchar *const buf) { } int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9058,8 +9022,7 @@ int ha_rocksdb::index_prev(uchar *const buf) { } int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9072,19 +9035,33 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { if (active_index == pk_index(table, m_tbl_def)) { rc = rnd_next_with_direction(buf, move_forward); } else { - if (m_skip_scan_it_next_call) { - m_skip_scan_it_next_call = false; - } else { - if (move_forward) - m_scan_it->Next(); /* this call cannot fail */ - else - m_scan_it->Prev(); + THD *thd = ha_thd(); + for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_inwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ + } else { + m_scan_it->Prev(); + } + } + rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index], + m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + break; + } + rc = find_icp_matching_index_rec(move_forward, buf); + if (!rc) rc = secondary_index_read(active_index, buf); + if (!should_skip_invalidated_record(rc)) { + break; + } } - rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it, - !move_forward); - rc = find_icp_matching_index_rec(move_forward, buf); - if (!rc) - rc = secondary_index_read(active_index, buf); } DBUG_RETURN(rc); @@ -9102,8 +9079,7 @@ int ha_rocksdb::index_first(uchar *const buf) { int rc = m_key_descr_arr[active_index]->m_is_reverse_cf ? index_last_intern(buf) : index_first_intern(buf); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9120,8 +9096,7 @@ int ha_rocksdb::index_last(uchar *const buf) { int rc = m_key_descr_arr[active_index]->m_is_reverse_cf ? index_first_intern(buf) : index_last_intern(buf); - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -9189,8 +9164,9 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { m_skip_scan_it_next_call = true; rc = index_next_with_direction(buf, true); - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) - break; // exit the loop + if (!should_recreate_snapshot(rc, is_new_snapshot)) { + break; /* exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -9264,11 +9240,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) { - kd.get_infimum_key(key, &key_size); - } else { - kd.get_supremum_key(key, &key_size); - } + int key_end_matching_bytes = kd.get_last_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -9279,7 +9251,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes); m_scan_it->SeekForPrev(index_key); m_skip_scan_it_next_call = false; @@ -9288,12 +9260,12 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { rc = rnd_next_with_direction(buf, false); } else { rc = find_icp_matching_index_rec(false /*move_forward*/, buf); - if (!rc) - rc = secondary_index_read(active_index, buf); + if (!rc) rc = secondary_index_read(active_index, buf); } - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot) + if (!should_recreate_snapshot(rc, is_new_snapshot)) { break; /* exit the loop */ + } // release the snapshot and iterator so they will be regenerated tx->release_snapshot(); @@ -9332,7 +9304,7 @@ void ha_rocksdb::unlock_row() { - If the index is PRIMARY KEY, and if all of the columns of the table are covered by the PRIMARY KEY, SingleDelete can be used. */ -bool ha_rocksdb::can_use_single_delete(const uint &index) const { +bool ha_rocksdb::can_use_single_delete(const uint index) const { return (index != pk_index(table, m_tbl_def) || (!has_hidden_pk(table) && table->key_info[index].ext_key_parts == table->s->fields)); @@ -9348,12 +9320,18 @@ bool ha_rocksdb::skip_unique_check() const { 3) the user set unique_checks option to 0, and the table does not have any indexes. If the table has secondary keys, then those might becomes inconsisted/corrupted + 4) We're using read-free replication */ return THDVAR(table->in_use, bulk_load) || (m_force_skip_unique_check && m_skip_unique_check) || (my_core::thd_test_options(table->in_use, OPTION_RELAXED_UNIQUE_CHECKS) && - m_tbl_def->m_key_count == 1); + m_tbl_def->m_key_count == 1) || +#ifdef MARIAROCKS_NOT_YET + use_read_free_rpl(); +#else + FALSE; +#endif } #ifdef MARIAROCKS_NOT_YET // MDEV-10975 @@ -9453,10 +9431,9 @@ const char *ha_rocksdb::get_key_comment(const uint index, return table_arg->key_info[index].comment.str; } -const std::string ha_rocksdb::generate_cf_name(const uint index, - const TABLE *const table_arg, - const Rdb_tbl_def *const tbl_def_arg, - bool *per_part_match_found) { +const std::string ha_rocksdb::generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(tbl_def_arg != nullptr); DBUG_ASSERT(per_part_match_found != nullptr); @@ -9501,6 +9478,9 @@ const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) { } /** + Write a new row + + @param[in] buf new row data to write @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) @@ -9523,6 +9503,11 @@ int ha_rocksdb::write_row(uchar *const buf) { } } + // clear cache at beginning of write for INSERT ON DUPLICATE + // we may get multiple write->fail->read->update if there are multiple + // values from INSERT + m_dup_pk_found = false; + const int rv = update_write_row(nullptr, buf, skip_unique_check()); if (rv == 0) { @@ -9555,6 +9540,15 @@ void ha_rocksdb::set_last_rowkey(const uchar *const old_data) { #endif } +/** + Collect update data for primary key + + @param[in, out] row_info hold all data for update row, such as + new row data/old row data + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { int size; @@ -9567,10 +9561,9 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { row_info->new_pk_unpack_info = &m_pk_unpack_info; - size = - m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data, - m_pk_packed_tuple, row_info->new_pk_unpack_info, - false, 0, 0, nullptr, &row_info->ttl_pk_offset); + size = m_pk_descr->pack_record( + table, m_pack_buffer, row_info->new_data, m_pk_packed_tuple, + row_info->new_pk_unpack_info, false, 0, 0, nullptr); } else if (row_info->old_data == nullptr) { row_info->hidden_pk_id = update_hidden_pk_val(); size = @@ -9594,30 +9587,32 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { return HA_EXIT_SUCCESS; } -int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, +/** + Check the specified primary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether the primary key exists before. + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_unique_pk(const uint key_id, const struct update_row_info &row_info, - bool *const found, - bool *const pk_changed) { + bool *const found) { DBUG_ASSERT(found != nullptr); - DBUG_ASSERT(pk_changed != nullptr); - *pk_changed = false; + DBUG_ASSERT(row_info.old_pk_slice.size() == 0 || + row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0); - /* - For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs - always require locking. - */ - if (row_info.old_pk_slice.size() > 0) { - /* - If the keys are the same, then no lock is needed - */ - if (!row_info.new_pk_slice.compare(row_info.old_pk_slice)) { - *found = false; - return HA_EXIT_SUCCESS; - } - - *pk_changed = true; - } + /* Ignore PK violations if this is a optimized 'replace into' */ +#ifdef MARIAROCKS_NOT_YET + const bool ignore_pk_unique_check = ha_thd()->lex->blind_replace_into; +#else + const bool ignore_pk_unique_check= false; +#endif /* Perform a read to determine if a duplicate entry exists. For primary @@ -9640,17 +9635,56 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id, */ const rocksdb::Status s = get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice, - &m_retrieved_record); + ignore_pk_unique_check ? nullptr : &m_retrieved_record); if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error( table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler); } - *found = !s.IsNotFound(); + bool key_found = ignore_pk_unique_check ? false : !s.IsNotFound(); + + /* + If the pk key has ttl, we may need to pretend the row wasn't + found if it is already expired. + */ + if (key_found && m_pk_descr->has_ttl() && + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, + (row_info.tx->m_snapshot_timestamp + ? row_info.tx->m_snapshot_timestamp + : static_cast(std::time(nullptr))))) { + key_found = false; + } + + if (key_found && row_info.old_data == nullptr && m_insert_with_update) { + // In INSERT ON DUPLICATE KEY UPDATE ... case, if the insert failed + // due to a duplicate key, remember the last key and skip the check + // next time + m_dup_pk_found = true; + +#ifndef DBUG_OFF + // save it for sanity checking later + m_dup_pk_retrieved_record.copy(m_retrieved_record.data(), + m_retrieved_record.size(), &my_charset_bin); +#endif + } + + *found = key_found; + return HA_EXIT_SUCCESS; } -int ha_rocksdb::check_and_lock_sk(const uint &key_id, +/** + Check the specified secondary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether specified key value exists before. + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_sk(const uint key_id, const struct update_row_info &row_info, bool *const found) { DBUG_ASSERT(found != nullptr); @@ -9780,8 +9814,18 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, return HA_EXIT_SUCCESS; } +/** + Enumerate all keys to check their uniquess and also lock it + + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ int ha_rocksdb::check_uniqueness_and_lock( - const struct update_row_info &row_info, bool *const pk_changed) { + const struct update_row_info &row_info, bool pk_changed) { /* Go through each index and determine if the index has uniqueness requirements. If it does, then try to obtain a row lock on the new values. @@ -9793,7 +9837,12 @@ int ha_rocksdb::check_uniqueness_and_lock( int rc; if (is_pk(key_id, table, m_tbl_def)) { - rc = check_and_lock_unique_pk(key_id, row_info, &found, pk_changed); + if (row_info.old_pk_slice.size() > 0 && !pk_changed) { + found = false; + rc = HA_EXIT_SUCCESS; + } else { + rc = check_and_lock_unique_pk(key_id, row_info, &found); + } } else { rc = check_and_lock_sk(key_id, row_info, &found); } @@ -9802,23 +9851,11 @@ int ha_rocksdb::check_uniqueness_and_lock( return rc; } - /* - If the pk key has ttl, we may need to pretend the row wasn't - found if it is already expired. The pk record is read into - m_retrieved_record by check_and_lock_unique_pk(). - */ - if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, - (row_info.tx->m_snapshot_timestamp - ? row_info.tx->m_snapshot_timestamp - : static_cast(std::time(nullptr))))) { - found = false; - } - if (found) { /* There is a row with this key already, so error out. */ errkey = key_id; m_dupp_errkey = errkey; + return HA_ERR_FOUND_DUPP_KEY; } } @@ -9826,19 +9863,31 @@ int ha_rocksdb::check_uniqueness_and_lock( return HA_EXIT_SUCCESS; } +/** + Check whether secondary key value is duplicate or not + + @param[in] table_arg the table currently working on + @param[in key_def the key_def is being checked + @param[in] key secondary key storage data + @param[out] sk_info hold secondary key memcmp datas(new/old) + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg, - const Rdb_key_def &index, + const Rdb_key_def &key_def, const rocksdb::Slice *key, struct unique_sk_buf_info *sk_info) { uint n_null_fields = 0; - const rocksdb::Comparator *index_comp = index.get_cf()->GetComparator(); + const rocksdb::Comparator *index_comp = key_def.get_cf()->GetComparator(); /* Get proper SK buffer. */ uchar *sk_buf = sk_info->swap_and_get_sk_buf(); /* Get memcmp form of sk without extended pk tail */ uint sk_memcmp_size = - index.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); + key_def.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); sk_info->sk_memcmp_key = rocksdb::Slice(reinterpret_cast(sk_buf), sk_memcmp_size); @@ -9858,12 +9907,17 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, const rocksdb::Slice &value, bool sort) { DBUG_ENTER_FUNC(); int res; + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); // In the case of unsorted inserts, m_sst_info allocated here is not // used to store the keys. It is still used to indicate when tables // are switched. - if (m_sst_info == nullptr || m_sst_info->is_committed()) { + if (m_sst_info == nullptr || m_sst_info->is_done()) { m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, kd.get_name(), cf, *rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api))); @@ -9896,17 +9950,61 @@ int ha_rocksdb::finalize_bulk_load(bool print_client_error) { /* Skip if there are no possible ongoing bulk loads */ if (m_sst_info) { - res = m_sst_info->commit(print_client_error); + if (m_sst_info->is_done()) { + m_sst_info.reset(); + DBUG_RETURN(res); + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + + // Wrap up the current work in m_sst_info and get ready to commit + // This transfer the responsibility of commit over to commit_info + res = m_sst_info->finish(&commit_info, print_client_error); + if (res == 0) { + // Make sure we have work to do - under race condition we could lose + // to another thread and end up with no work + if (commit_info.has_work()) { + rocksdb::IngestExternalFileOptions opts; + opts.move_files = true; + opts.snapshot_consistency = false; + opts.allow_global_seqno = false; + opts.allow_blocking_flush = false; + + const rocksdb::Status s = rdb->IngestExternalFile( + commit_info.get_cf(), commit_info.get_committed_files(), opts); + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + res = HA_ERR_ROCKSDB_BULK_LOAD; + } else { + // Mark the list of SST files as committed, otherwise they'll get + // cleaned up when commit_info destructs + commit_info.commit(); + } + } + } m_sst_info.reset(); } DBUG_RETURN(res); } -int ha_rocksdb::update_pk(const Rdb_key_def &kd, - const struct update_row_info &row_info, - const bool &pk_changed) { - const uint key_id = kd.get_keyno(); - const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); +/** + Update an existing primary key record or write a new primary key record + + @param[in] kd the primary key is being update/write + @param[in] update_row_info hold all row data, such as old row data and + new row data + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + bool pk_changed) { + uint key_id = kd.get_keyno(); + bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); ulonglong bytes_written = 0; /* @@ -9934,7 +10032,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, int rc = HA_EXIT_SUCCESS; rocksdb::Slice value_slice; /* Prepare the new record to be written into RocksDB */ - if ((rc = convert_record_to_storage_format(row_info, &value_slice))) { + if ((rc = m_converter->encode_value_slice( + m_pk_descr, row_info.new_pk_slice, row_info.new_pk_unpack_info, + !row_info.old_pk_slice.empty(), should_store_row_debug_checksums(), + m_ttl_bytes, &m_ttl_bytes_updated, &value_slice))) { return rc; } @@ -9954,7 +10055,9 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice, value_slice); } else { - const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice); + const bool assume_tracked = can_assume_tracked(ha_thd()); + const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice, + assume_tracked); if (!s.ok()) { if (s.IsBusy()) { errkey = table->s->primary_key; @@ -9974,9 +10077,22 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, return rc; } -int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, - const struct update_row_info &row_info, - const bool bulk_load_sk) { +/** + update an existing secondary key record or write a new secondary key record + + @param[in] table_arg Table we're working on + @param[in] kd The secondary key being update/write + @param[in] row_info data structure contains old row data and new row data + @param[in] bulk_load_sk whether support bulk load. Currently it is only + support for write + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_sk(const TABLE *const table_arg, + const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) { int new_packed_size; int old_packed_size; int rc = HA_EXIT_SUCCESS; @@ -9998,19 +10114,18 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, return HA_EXIT_SUCCESS; } - const bool store_row_debug_checksums = should_store_row_debug_checksums(); - + bool store_row_debug_checksums = should_store_row_debug_checksums(); new_packed_size = kd.pack_record(table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums, - row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes); + row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes); if (row_info.old_data != nullptr) { // The old value old_packed_size = kd.pack_record( table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0, - nullptr, nullptr, m_ttl_bytes); + nullptr, m_ttl_bytes); /* Check if we are going to write the same value. This can happen when @@ -10070,13 +10185,22 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, return rc; } -int ha_rocksdb::update_indexes(const struct update_row_info &row_info, - const bool &pk_changed) { +/** + Update existing indexes(PK/SKs) or write new indexes(PK/SKs) + + @param[in] row_info hold all row data, such as old key/new key + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) { int rc; bool bulk_load_sk; // The PK must be updated first to pull out the TTL value. - rc = update_pk(*m_pk_descr, row_info, pk_changed); + rc = update_write_pk(*m_pk_descr, row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { return rc; } @@ -10091,7 +10215,8 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info, continue; } - rc = update_sk(table, *m_key_descr_arr[key_id], row_info, bulk_load_sk); + rc = update_write_sk(table, *m_key_descr_arr[key_id], row_info, + bulk_load_sk); if (rc != HA_EXIT_SUCCESS) { return rc; } @@ -10100,11 +10225,26 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info, return HA_EXIT_SUCCESS; } +/** + Update an existing row or write a new row + + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update + @param[in] skip_unique_check whether to check uniqueness + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ int ha_rocksdb::update_write_row(const uchar *const old_data, const uchar *const new_data, const bool skip_unique_check) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + bool pk_changed = false; struct update_row_info row_info; @@ -10112,7 +10252,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, row_info.new_data = new_data; row_info.skip_unique_check = skip_unique_check; row_info.new_pk_unpack_info = nullptr; - set_last_rowkey(old_data); row_info.tx = get_or_create_tx(table->in_use); @@ -10133,12 +10272,20 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, DBUG_RETURN(rc); } + /* + For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs + always require locking. + */ + if (row_info.old_pk_slice.size() > 0) { + pk_changed = row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0; + } + if (!skip_unique_check) { /* Check to see if we are going to have failures because of unique keys. Also lock the appropriate key values. */ - rc = check_uniqueness_and_lock(row_info, &pk_changed); + rc = check_uniqueness_and_lock(row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { DBUG_RETURN(rc); } @@ -10152,7 +10299,7 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, here because updates to the transaction will be made and those updates cannot be easily removed without rolling back the entire transaction. */ - rc = update_indexes(row_info, pk_changed); + rc = update_write_indexes(row_info, pk_changed); if (rc != HA_EXIT_SUCCESS) { DBUG_RETURN(rc); } @@ -10193,18 +10340,29 @@ void ha_rocksdb::setup_iterator_bounds( const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, uchar *const upper_bound, rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) { - uint min_len = std::min(eq_cond.size(), bound_len); - memcpy(upper_bound, eq_cond.data(), min_len); - kd.successor(upper_bound, min_len); - memcpy(lower_bound, eq_cond.data(), min_len); - kd.predecessor(lower_bound, min_len); + // If eq_cond is shorter than Rdb_key_def::INDEX_NUMBER_SIZE, we should be + // able to get better bounds just by using index id directly. + if (eq_cond.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) { + DBUG_ASSERT(bound_len == Rdb_key_def::INDEX_NUMBER_SIZE); + uint size; + kd.get_infimum_key(lower_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + kd.get_supremum_key(upper_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + } else { + DBUG_ASSERT(bound_len <= eq_cond.size()); + memcpy(upper_bound, eq_cond.data(), bound_len); + kd.successor(upper_bound, bound_len); + memcpy(lower_bound, eq_cond.data(), bound_len); + kd.predecessor(lower_bound, bound_len); + } if (kd.m_is_reverse_cf) { - *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len); - *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len); + *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); } else { - *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len); - *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len); + *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); } } @@ -10223,8 +10381,17 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, bool skip_bloom = true; const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); + // The size of m_scan_it_lower_bound (and upper) is technically + // max_packed_sk_len as calculated in ha_rocksdb::alloc_key_buffers. Rather + // than recalculating that number, we pass in the max of eq_cond_len and + // Rdb_key_def::INDEX_NUMBER_SIZE which is guaranteed to be smaller than + // max_packed_sk_len, hence ensuring no buffer overrun. + // + // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is + // used. if (check_bloom_and_set_bounds( - ha_thd(), kd, eq_cond, use_all_keys, eq_cond_len, + ha_thd(), kd, eq_cond, use_all_keys, + std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE), m_scan_it_lower_bound, m_scan_it_upper_bound, &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) { skip_bloom = false; @@ -10262,7 +10429,8 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, m_scan_it_snapshot = rdb->GetSnapshot(); auto read_opts = rocksdb::ReadOptions(); - read_opts.total_order_seek = true; // TODO: set based on WHERE conditions + // TODO(mung): set based on WHERE conditions + read_opts.total_order_seek = true; read_opts.snapshot = m_scan_it_snapshot; m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); } else { @@ -10287,12 +10455,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = + m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - setup_scan_iterator(*m_pk_descr, &table_key, false, - key_start_matching_bytes); + setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } @@ -10305,8 +10473,16 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() { int ha_rocksdb::rnd_init(bool scan) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + Rdb_transaction *const tx = get_or_create_tx(table->in_use); - setup_read_decoders(); + + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); if (scan) { m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); @@ -10334,9 +10510,9 @@ int ha_rocksdb::rnd_next(uchar *const buf) { int rc; for (;;) { rc = rnd_next_with_direction(buf, true); - if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !m_rnd_scan_is_new_snapshot) + if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) { break; /* exit the loop */ - + } // release the snapshot and iterator and then regenerate them Rdb_transaction *tx = get_or_create_tx(table->in_use); tx->release_snapshot(); @@ -10346,8 +10522,7 @@ int ha_rocksdb::rnd_next(uchar *const buf) { m_rnd_scan_is_new_snapshot = false; - if (rc == HA_ERR_KEY_NOT_FOUND) - rc = HA_ERR_END_OF_FILE; + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); } @@ -10359,6 +10534,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { DBUG_ENTER_FUNC(); int rc; + THD *thd = ha_thd(); table->status = STATUS_NOT_FOUND; #ifdef MARIAROCKS_NOT_YET @@ -10377,13 +10553,20 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { } for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rnwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + if (m_skip_scan_it_next_call) { m_skip_scan_it_next_call = false; } else { - if (move_forward) + if (move_forward) { m_scan_it->Next(); /* this call cannot fail */ - else + } else { m_scan_it->Prev(); /* this call cannot fail */ + } } if (!is_valid(m_scan_it)) { @@ -10417,11 +10600,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { const rocksdb::Status s = get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record); if (s.IsNotFound() && - my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) { - // This occurs if we accessed a row, tried to lock it, failed, - // released and reacquired the snapshot (because of READ COMMITTED - // mode) and the row was deleted by someone else in the meantime. - // If so, we just want to move on to the next row. + should_skip_invalidated_record(HA_ERR_KEY_NOT_FOUND)) { continue; } @@ -10481,11 +10660,18 @@ int ha_rocksdb::rnd_end() { int ha_rocksdb::index_init(uint idx, bool sorted) { DBUG_ENTER_FUNC(); + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); - setup_read_decoders(); - m_start_range= NULL; + m_start_range= NULL; + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); if (!m_keyread_only) { m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); @@ -10529,10 +10715,20 @@ int ha_rocksdb::truncate() { DBUG_ASSERT(m_tbl_def != nullptr); - m_tbl_def->m_auto_incr_val = 1; - m_tbl_def->m_hidden_pk_val = 1; + // Save table name to use later + std::string table_name = m_tbl_def->full_tablename(); - DBUG_RETURN(remove_rows(m_tbl_def)); + // Delete current table + int err = delete_table(m_tbl_def); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + // Reset auto_increment_value to 1 if auto-increment feature is enabled + // By default, the starting valid value for auto_increment_value is 1 + DBUG_RETURN(create_table( + table_name, table, + table->found_next_number_field ? 1 : 0 /* auto_increment_value */)); } /* @@ -10610,9 +10806,11 @@ rocksdb::Status ha_rocksdb::delete_or_singledelete( uint index, Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key) { - if (can_use_single_delete(index)) - return tx->single_delete(column_family, key); - return tx->delete_key(column_family, key); + const bool assume_tracked = can_assume_tracked(ha_thd()); + if (can_use_single_delete(index)) { + return tx->single_delete(column_family, key, assume_tracked); + } + return tx->delete_key(column_family, key, assume_tracked); } void ha_rocksdb::update_stats(void) { @@ -10643,8 +10841,9 @@ void ha_rocksdb::update_stats(void) { int ha_rocksdb::info(uint flag) { DBUG_ENTER_FUNC(); - if (!table) + if (!table) { DBUG_RETURN(HA_EXIT_FAILURE); + } if (flag & HA_STATUS_VARIABLE) { /* @@ -10672,10 +10871,8 @@ int ha_rocksdb::info(uint flag) { // if number of records is hardcoded, we do not want to force computation // of memtable cardinalities - if (stats.records == 0 || - (rocksdb_force_compute_memtable_stats && - rocksdb_debug_optimizer_n_rows == 0)) - { + if (stats.records == 0 || (rocksdb_force_compute_memtable_stats && + rocksdb_debug_optimizer_n_rows == 0)) { // First, compute SST files stats uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; auto r = get_range(pk_index(table, m_tbl_def), buf); @@ -10685,8 +10882,8 @@ int ha_rocksdb::info(uint flag) { if (stats.records == 0) { rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz, include_flags); - stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; - stats.data_file_length+= sz; + stats.records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + stats.data_file_length += sz; } // Second, compute memtable stats. This call is expensive, so cache // values computed for some time. @@ -10723,14 +10920,17 @@ int ha_rocksdb::info(uint flag) { stats.records += m_table_handler->m_mtcache_count; stats.data_file_length += m_table_handler->m_mtcache_size; } - - if (rocksdb_debug_optimizer_n_rows > 0) - stats.records = rocksdb_debug_optimizer_n_rows; } - if (stats.records != 0) + if (rocksdb_debug_optimizer_n_rows > 0) { + stats.records = rocksdb_debug_optimizer_n_rows; + } + + if (stats.records != 0) { stats.mean_rec_length = stats.data_file_length / stats.records; + } } + if (flag & HA_STATUS_CONST) { ref_length = m_pk_descr->max_storage_fmt_length(); @@ -10756,8 +10956,7 @@ int ha_rocksdb::info(uint flag) { } else { x = 0; } - if (x > stats.records) - x = stats.records; + if (x > stats.records) x = stats.records; if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) || rocksdb_debug_optimizer_n_rows > 0) { // Fake cardinality implementation. For example, (idx1, idx2, idx3) @@ -10792,7 +10991,7 @@ int ha_rocksdb::info(uint flag) { uniqueness violation. */ errkey = m_dupp_errkey; - dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. + dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. } if (flag & HA_STATUS_AUTO) { @@ -10806,8 +11005,9 @@ void ha_rocksdb::position(const uchar *const record) { DBUG_ENTER_FUNC(); longlong hidden_pk_id = 0; - if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) - DBUG_ASSERT(false); // should never reach here + if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { + DBUG_ASSERT(false); // should never reach here + } /* Get packed primary key value from the record. @@ -10829,8 +11029,9 @@ void ha_rocksdb::position(const uchar *const record) { It could be that mem-comparable form of PK occupies less than ref_length bytes. Fill the remainder with zeros. */ - if (ref_length > packed_size) + if (ref_length > packed_size) { memset(ref + packed_size, 0, ref_length - packed_size); + } DBUG_VOID_RETURN; } @@ -10888,8 +11089,7 @@ void ha_rocksdb::calc_updated_indexes() { /* Walk over all key parts, including the "extended key" suffix */ const uint key_parts = kd.get_key_parts(); for (uint kp = 0; kp < key_parts; kp++) { - if (has_hidden_pk(table) && kp + 1 == key_parts) - break; + if (has_hidden_pk(table) && kp + 1 == key_parts) break; Field *const field = kd.get_table_field_for_part_no(table, kp); if (bitmap_is_set(table->write_set, field->field_index)) { @@ -10902,6 +11102,9 @@ void ha_rocksdb::calc_updated_indexes() { } /** + Update an existing row + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) @@ -10920,7 +11123,7 @@ int ha_rocksdb::update_row(const uchar *const old_data, */ DBUG_ASSERT(new_data == table->record[0]); - const int rv = update_write_row(old_data, new_data, false); + const int rv = update_write_row(old_data, new_data, skip_unique_check()); if (rv == 0) { #ifdef MARIAROCKS_NOT_YET @@ -11025,7 +11228,8 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to, void ha_rocksdb::read_thd_vars(THD *const thd) { m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums); - m_verify_row_debug_checksums = THDVAR(thd, verify_row_debug_checksums); + m_converter->set_verify_row_debug_checksums( + THDVAR(thd, verify_row_debug_checksums)); m_checksums_pct = THDVAR(thd, checksums_pct); } @@ -11116,7 +11320,8 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { tx->io_perf_end_and_record(&m_io_perf); tx->m_n_mysql_tables_in_use--; if (tx->m_n_mysql_tables_in_use == 0 && - !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + !my_core::thd_test_options(thd, + OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { /* Do like InnoDB: when we get here, it's time to commit a single-statement transaction. @@ -11248,9 +11453,8 @@ rocksdb::Range get_range(const Rdb_key_def &kd, } } -rocksdb::Range -ha_rocksdb::get_range(const int &i, - uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { +rocksdb::Range ha_rocksdb::get_range( + const int i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { return myrocks::get_range(*m_key_descr_arr[i], buf); } @@ -11261,11 +11465,10 @@ ha_rocksdb::get_range(const int &i, but in drop_index_thread's case, it means index is marked as removed, so no further seek will happen for the index id. */ -static bool is_myrocks_index_empty( - rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf, - const rocksdb::ReadOptions &read_opts, - const uint index_id) -{ +static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh, + const bool is_reverse_cf, + const rocksdb::ReadOptions &read_opts, + const uint index_id) { bool index_removed = false; uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rdb_netbuf_store_uint32(key_buf, index_id); @@ -11276,8 +11479,7 @@ static bool is_myrocks_index_empty( if (!it->Valid()) { index_removed = true; } else { - if (memcmp(it->key().data(), key_buf, - Rdb_key_def::INDEX_NUMBER_SIZE)) { + if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) { // Key does not have same prefix index_removed = true; } @@ -11304,8 +11506,8 @@ void Rdb_drop_index_thread::run() { timespec ts; int sec= dict_manager.is_drop_index_empty() - ? 24 * 60 * 60 // no filtering - : 60; // filtering + ? 24 * 60 * 60 // no filtering + : 60; // filtering set_timespec(ts,sec); const auto ret MY_ATTRIBUTE((__unused__)) = @@ -11322,26 +11524,23 @@ void Rdb_drop_index_thread::run() { if (!indices.empty()) { std::unordered_set finished; rocksdb::ReadOptions read_opts; - read_opts.total_order_seek = true; // disable bloom filter + read_opts.total_order_seek = true; // disable bloom filter for (const auto d : indices) { uint32 cf_flags = 0; if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) { - sql_print_error("RocksDB: Failed to get column family flags " - "from cf id %u. MyRocks data dictionary may " - "get corrupted.", - d.cf_id); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Failed to get column family flags " + "from cf id %u. MyRocks data dictionary may " + "get corrupted.", + d.cf_id); abort(); } rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); DBUG_ASSERT(cfh); const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG; - if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) - { - finished.insert(d); - continue; - } uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0, is_reverse_cf ? 0 : 1); @@ -11361,8 +11560,7 @@ void Rdb_drop_index_thread::run() { } rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); } - if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) - { + if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) { finished.insert(d); } } @@ -11388,6 +11586,44 @@ Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) { return ddl_manager.find(str); } +/* + Overload func for delete table ---it deletes table meta data in data + dictionary immediately and delete real data in background thread(async) + + @param tbl IN MyRocks table definition + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(tbl != nullptr); + DBUG_ASSERT(m_tbl_def == nullptr || m_tbl_def == tbl); + + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + + dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch); + + /* + Remove the table entry in data dictionary (this will also remove it from + the persistent data dictionary). + */ + ddl_manager.remove(tbl, batch, true); + + int err = dict_manager.commit(batch); + if (err) { + DBUG_RETURN(err); + } + + rdb_drop_idx_thread.signal(); + // avoid dangling pointer + m_tbl_def = nullptr; + DBUG_RETURN(HA_EXIT_SUCCESS); +} + /* Note: the following function is called when the table is not open. That is, this->table==nullptr, pk_key_descr==nullptr, etc. @@ -11404,29 +11640,13 @@ int ha_rocksdb::delete_table(const char *const tablename) { DBUG_ASSERT(tablename != nullptr); - const std::unique_ptr wb = dict_manager.begin(); - rocksdb::WriteBatch *const batch = wb.get(); - /* Find the table in the hash */ Rdb_tbl_def *const tbl = get_table_if_exists(tablename); - if (!tbl) + if (!tbl) { DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch); - - /* - Remove the table entry in data dictionary (this will also remove it from - the persistent data dictionary). - */ - ddl_manager.remove(tbl, batch, true); - - int err = dict_manager.commit(batch); - if (err) { - DBUG_RETURN(err); } - rdb_drop_idx_thread.signal(); - - DBUG_RETURN(HA_EXIT_SUCCESS); + DBUG_RETURN(delete_table(tbl)); } int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { @@ -11579,21 +11799,33 @@ int ha_rocksdb::extra(enum ha_extra_function operation) { DBUG_ENTER_FUNC(); switch (operation) { - case HA_EXTRA_KEYREAD: - m_keyread_only = true; - break; - case HA_EXTRA_NO_KEYREAD: - m_keyread_only = false; - break; - case HA_EXTRA_FLUSH: - /* - If the table has blobs, then they are part of m_retrieved_record. - This call invalidates them. - */ - m_retrieved_record.Reset(); - break; - default: - break; + case HA_EXTRA_KEYREAD: + m_keyread_only = true; + break; + case HA_EXTRA_NO_KEYREAD: + m_keyread_only = false; + break; + case HA_EXTRA_FLUSH: + /* + If the table has blobs, then they are part of m_retrieved_record. + This call invalidates them. + */ + m_retrieved_record.Reset(); + break; + case HA_EXTRA_INSERT_WITH_UPDATE: + // INSERT ON DUPLICATE KEY UPDATE + if (rocksdb_enable_insert_with_update_caching) { + m_insert_with_update = true; + } + break; + case HA_EXTRA_NO_IGNORE_DUP_KEY: + // PAIRED with HA_EXTRA_INSERT_WITH_UPDATE or HA_EXTRA_WRITE_CAN_REPLACE + // that indicates the end of REPLACE / INSERT ON DUPLICATE KEY + m_insert_with_update = false; + break; + + default: + break; } DBUG_RETURN(HA_EXIT_SUCCESS); @@ -11664,8 +11896,7 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *const min_key, uint64_t sz = 0; auto disk_size = kd.m_stats.m_actual_disk_size; - if (disk_size == 0) - disk_size = kd.m_stats.m_data_size; + if (disk_size == 0) disk_size = kd.m_stats.m_data_size; auto rows = kd.m_stats.m_rows; if (rows == 0 || disk_size == 0) { rows = 1; @@ -11776,7 +12007,7 @@ static int calculate_stats( // get RocksDB table properties for these ranges rocksdb::TablePropertiesCollection props; - for (auto it : ranges) { + for (const auto &it : ranges) { const auto old_size MY_ATTRIBUTE((__unused__)) = props.size(); const auto status = rdb->GetPropertiesOfTablesInRange( it.first, &it.second[0], it.second.size(), &props); @@ -11966,73 +12197,77 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, // The next value can be more complicated if either 'inc' or 'off' is not 1 ulonglong last_val = auto_incr; - // Loop until we can correctly update the atomic value - do { - DBUG_ASSERT(last_val > 0); - // Calculate the next value in the auto increment series: offset - // + N * increment where N is 0, 1, 2, ... - // - // For further information please visit: - // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html - // - // The following is confusing so here is an explanation: - // To get the next number in the sequence above you subtract out the - // offset, calculate the next sequence (N * increment) and then add the - // offset back in. - // - // The additions are rearranged to avoid overflow. The following is - // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact - // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: - // - // (a+b)/c - // = (a - a%c + a%c + b - b%c + b%c) / c - // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c - // = a/c + b/c + (a%c + b%c) / c - // - // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the - // following statement. - ulonglong n = - (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; - - // Check if n * inc + off will overflow. This can only happen if we have - // an UNSIGNED BIGINT field. - if (n > (std::numeric_limits::max() - off) / inc) { - DBUG_ASSERT(max_val == std::numeric_limits::max()); - // The 'last_val' value is already equal to or larger than the largest - // value in the sequence. Continuing would wrap around (technically - // the behavior would be undefined). What should we do? - // We could: - // 1) set the new value to the last possible number in our sequence - // as described above. The problem with this is that this - // number could be smaller than a value in an existing row. - // 2) set the new value to the largest possible number. This number - // may not be in our sequence, but it is guaranteed to be equal - // to or larger than any other value already inserted. + if (last_val > max_val) { + new_val = std::numeric_limits::max(); + } else { + // Loop until we can correctly update the atomic value + do { + DBUG_ASSERT(last_val > 0); + // Calculate the next value in the auto increment series: offset + // + N * increment where N is 0, 1, 2, ... // - // For now I'm going to take option 2. + // For further information please visit: + // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html // - // Returning ULLONG_MAX from get_auto_increment will cause the SQL - // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to - // the SE API for get_auto_increment, inserts will fail with - // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but - // inserts will fail with ER_DUP_ENTRY for other types (or no failure - // if the column is in a non-unique SK). - new_val = std::numeric_limits::max(); - auto_incr = new_val; // Store the largest value into auto_incr - break; - } + // The following is confusing so here is an explanation: + // To get the next number in the sequence above you subtract out the + // offset, calculate the next sequence (N * increment) and then add the + // offset back in. + // + // The additions are rearranged to avoid overflow. The following is + // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact + // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: + // + // (a+b)/c + // = (a - a%c + a%c + b - b%c + b%c) / c + // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c + // = a/c + b/c + (a%c + b%c) / c + // + // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the + // following statement. + ulonglong n = + (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; - new_val = n * inc + off; + // Check if n * inc + off will overflow. This can only happen if we have + // an UNSIGNED BIGINT field. + if (n > (std::numeric_limits::max() - off) / inc) { + DBUG_ASSERT(max_val == std::numeric_limits::max()); + // The 'last_val' value is already equal to or larger than the largest + // value in the sequence. Continuing would wrap around (technically + // the behavior would be undefined). What should we do? + // We could: + // 1) set the new value to the last possible number in our sequence + // as described above. The problem with this is that this + // number could be smaller than a value in an existing row. + // 2) set the new value to the largest possible number. This number + // may not be in our sequence, but it is guaranteed to be equal + // to or larger than any other value already inserted. + // + // For now I'm going to take option 2. + // + // Returning ULLONG_MAX from get_auto_increment will cause the SQL + // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to + // the SE API for get_auto_increment, inserts will fail with + // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but + // inserts will fail with ER_DUP_ENTRY for other types (or no failure + // if the column is in a non-unique SK). + new_val = std::numeric_limits::max(); + auto_incr = new_val; // Store the largest value into auto_incr + break; + } - // Attempt to store the new value (plus 1 since m_auto_incr_val contains - // the next available value) into the atomic value. If the current - // value no longer matches what we have in 'last_val' this will fail and - // we will repeat the loop (`last_val` will automatically get updated - // with the current value). - // - // See above explanation for inc == 1 for why we use std::min. - } while (!auto_incr.compare_exchange_weak(last_val, - std::min(new_val + 1, max_val))); + new_val = n * inc + off; + + // Attempt to store the new value (plus 1 since m_auto_incr_val contains + // the next available value) into the atomic value. If the current + // value no longer matches what we have in 'last_val' this will fail and + // we will repeat the loop (`last_val` will automatically get updated + // with the current value). + // + // See above explanation for inc == 1 for why we use std::min. + } while (!auto_incr.compare_exchange_weak( + last_val, std::min(new_val + 1, max_val))); + } } *first_value = new_val; @@ -12048,13 +12283,13 @@ const char *dbug_print_item(Item *const item) { char *const buf = dbug_item_print_buf; String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); str.length(0); - if (!item) - return "(Item*)nullptr"; + if (!item) return "(Item*)nullptr"; item->print(&str, QT_ORDINARY); - if (str.c_ptr() == buf) + if (str.c_ptr() == buf) { return buf; - else + } else { return "Couldn't fit into buffer"; + } } #endif /*DBUG_OFF*/ @@ -12183,7 +12418,6 @@ bool ha_rocksdb::prepare_inplace_alter_table( ALTER_DROP_UNIQUE_INDEX | ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | ALTER_ADD_UNIQUE_INDEX)) { - if (has_hidden_pk(altered_table)) { new_n_keys += 1; } @@ -12469,7 +12703,7 @@ int ha_rocksdb::inplace_populate_sk( const int new_packed_size = index->pack_record( new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple, &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0, - nullptr, nullptr, m_ttl_bytes); + nullptr, m_ttl_bytes); const rocksdb::Slice key = rocksdb::Slice( reinterpret_cast(m_sk_packed_tuple), new_packed_size); @@ -12517,9 +12751,9 @@ int ha_rocksdb::inplace_populate_sk( is used inside print_keydup_error so that the error message shows the duplicate record. */ - if (index->unpack_record(new_table_arg, new_table_arg->record[0], - &merge_key, nullptr, - m_verify_row_debug_checksums)) { + if (index->unpack_record( + new_table_arg, new_table_arg->record[0], &merge_key, + &merge_val, m_converter->get_verify_row_debug_checksums())) { /* Should never reach here */ DBUG_ASSERT(0); } @@ -12549,7 +12783,9 @@ int ha_rocksdb::inplace_populate_sk( DBUG_RETURN(res); } - if ((res = tx->finish_bulk_load())) { + bool is_critical_error; + res = tx->finish_bulk_load(&is_critical_error); + if (res && is_critical_error) { // NO_LINT_DEBUG sql_print_error("Error finishing bulk load."); DBUG_RETURN(res); @@ -12758,22 +12994,22 @@ bool ha_rocksdb::commit_inplace_alter_table( #define SHOW_FNAME(name) rocksdb_show_##name -#define DEF_SHOW_FUNC(name, key) \ - static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ - rocksdb_status_counters.name = \ - rocksdb_stats->getTickerCount(rocksdb::key); \ - var->type = SHOW_LONGLONG; \ - var->value = (char *)&rocksdb_status_counters.name; \ - return HA_EXIT_SUCCESS; \ +#define DEF_SHOW_FUNC(name, key) \ + static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ + rocksdb_status_counters.name = \ + rocksdb_stats->getTickerCount(rocksdb::key); \ + var->type = SHOW_LONGLONG; \ + var->value = reinterpret_cast(&rocksdb_status_counters.name); \ + return HA_EXIT_SUCCESS; \ } -#define DEF_STATUS_VAR(name) \ +#define DEF_STATUS_VAR(name) \ { "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC } -#define DEF_STATUS_VAR_PTR(name, ptr, option) \ +#define DEF_STATUS_VAR_PTR(name, ptr, option) \ { "rocksdb_" name, (char *)ptr, option } -#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ +#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ { name, reinterpret_cast(ptr), option } struct rocksdb_status_counters_t { @@ -13003,9 +13239,8 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) { var->value = reinterpret_cast(&myrocks_status_variables); } -static ulonglong -io_stall_prop_value(const std::map &props, - const std::string &key) { +static ulonglong io_stall_prop_value( + const std::map &props, const std::string &key) { std::map::const_iterator iter = props.find("io_stalls." + key); if (iter != props.end()) { @@ -13183,6 +13418,10 @@ static SHOW_VAR rocksdb_status_vars[] = { SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, SHOW_LONGLONG), +#ifndef DBUG_OFF + DEF_STATUS_VAR_PTR("num_get_for_update_calls", + &rocksdb_num_get_for_update_calls, SHOW_LONGLONG), +#endif // the variables generated by SHOW_FUNC are sorted only by prefix (first // arg in the tuple below), so make sure it is unique to make sorting // deterministic as quick sort is not stable @@ -13424,6 +13663,49 @@ bool Rdb_manual_compaction_thread::is_manual_compaction_finished(int mc_id) { return finished; } +/** + * Locking read + Not Found + Read Committed occurs if we accessed + * a row by Seek, tried to lock it, failed, released and reacquired the + * snapshot (because of READ COMMITTED mode) and the row was deleted by + * someone else in the meantime. + * If so, we either just skipping the row, or re-creating a snapshot + * and seek again. In both cases, Read Committed constraint is not broken. + */ +bool ha_rocksdb::should_skip_invalidated_record(const int rc) { + if ((m_lock_rows != RDB_LOCK_NONE && rc == HA_ERR_KEY_NOT_FOUND && + my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED)) { + return true; + } + return false; +} +/** + * Indicating snapshot needs to be re-created and retrying seek again, + * instead of returning errors or empty set. This is normally applicable + * when hitting kBusy when locking the first row of the transaction, + * with Repeatable Read isolation level. + */ +bool ha_rocksdb::should_recreate_snapshot(const int rc, + const bool is_new_snapshot) { + if (should_skip_invalidated_record(rc) || + (rc == HA_ERR_ROCKSDB_STATUS_BUSY && is_new_snapshot)) { + return true; + } + return false; +} + +/** + * If calling put/delete/singledelete without locking the row, + * it is necessary to pass assume_tracked=false to RocksDB TX API. + * Read Free Replication and Blind Deletes are the cases when + * using TX API and skipping row locking. + */ +bool ha_rocksdb::can_assume_tracked(THD *thd) { + if (/* MARIAROCKS_NOT_YET use_read_free_rpl() ||*/ (THDVAR(thd, blind_delete_primary_key))) { + return false; + } + return true; +} + bool ha_rocksdb::check_bloom_and_set_bounds( THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, const bool use_all_keys, size_t bound_len, uchar *const lower_bound, @@ -13484,20 +13766,22 @@ bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, shorter require all parts of the key to be available for the short key match. */ - if ((use_all_keys && prefix_extractor->InRange(eq_cond)) - || prefix_extractor->SameResultWhenAppended(eq_cond)) + if ((use_all_keys && prefix_extractor->InRange(eq_cond)) || + prefix_extractor->SameResultWhenAppended(eq_cond)) { can_use = true; - else + } else { can_use = false; + } } else { /* if prefix extractor is not defined, all key parts have to be used by eq_cond. */ - if (use_all_keys) + if (use_all_keys) { can_use = true; - else + } else { can_use = false; + } } return can_use; @@ -13516,7 +13800,7 @@ bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; } bool rdb_is_ttl_read_filtering_enabled() { return rocksdb_enable_ttl_read_filtering; } -#ifndef NDEBUG +#ifndef DBUG_OFF int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; } int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; } int rdb_dbug_set_ttl_read_filter_ts() { @@ -13563,17 +13847,17 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types."); switch (err_type) { - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: - return "RDB_IO_ERROR_TX_COMMIT"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: - return "RDB_IO_ERROR_DICT_COMMIT"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: - return "RDB_IO_ERROR_BG_THREAD"; - case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: - return "RDB_IO_ERROR_GENERAL"; - default: - DBUG_ASSERT(false); - return "(unknown)"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: + return "RDB_IO_ERROR_TX_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: + return "RDB_IO_ERROR_DICT_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: + return "RDB_IO_ERROR_BG_THREAD"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: + return "RDB_IO_ERROR_GENERAL"; + default: + DBUG_ASSERT(false); + return "(unknown)"; } } @@ -13585,32 +13869,38 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { void rdb_handle_io_error(const rocksdb::Status status, const RDB_IO_ERROR_TYPE err_type) { if (status.IsIOError()) { + /* skip dumping core if write failed and we are allowed to do so */ +#ifdef MARIAROCKS_NOT_YET + if (skip_core_dump_on_error) { + opt_core_file = false; + } +#endif switch (err_type) { - case RDB_IO_ERROR_TX_COMMIT: - case RDB_IO_ERROR_DICT_COMMIT: { - rdb_log_status_error(status, "failed to write to WAL"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on WAL write error."); - abort(); - break; - } - case RDB_IO_ERROR_BG_THREAD: { - rdb_log_status_error(status, "BG thread failed to write to RocksDB"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on BG write error."); - abort(); - break; - } - case RDB_IO_ERROR_GENERAL: { - rdb_log_status_error(status, "failed on I/O"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on I/O error."); - abort(); - break; - } - default: - DBUG_ASSERT(0); - break; + case RDB_IO_ERROR_TX_COMMIT: + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "failed to write to WAL"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + case RDB_IO_ERROR_BG_THREAD: { + rdb_log_status_error(status, "BG thread failed to write to RocksDB"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on BG write error."); + abort(); + break; + } + case RDB_IO_ERROR_GENERAL: { + rdb_log_status_error(status, "failed on I/O"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on I/O error."); + abort(); + break; + } + default: + DBUG_ASSERT(0); + break; } } else if (status.IsCorruption()) { rdb_log_status_error(status, "data corruption detected!"); @@ -13620,16 +13910,16 @@ void rdb_handle_io_error(const rocksdb::Status status, abort(); } else if (!status.ok()) { switch (err_type) { - case RDB_IO_ERROR_DICT_COMMIT: { - rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); - /* NO_LINT_DEBUG */ - sql_print_error("MyRocks: aborting on WAL write error."); - abort(); - break; - } - default: - rdb_log_status_error(status, "Failed to read/write in RocksDB"); - break; + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + default: + rdb_log_status_error(status, "Failed to read/write in RocksDB"); + break; } } } @@ -13737,9 +14027,10 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update delayed_write_rate. " - "status code = %d, status = %s", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update delayed_write_rate. " + "status code = %d, status = %s", + s.code(), s.ToString().c_str()); } } RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); @@ -13797,8 +14088,7 @@ int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) { } else if (new_value_type == MYSQL_VALUE_TYPE_INT) { long long intbuf; value->val_int(value, &intbuf); - if (intbuf > 1) - return 1; + if (intbuf > 1) return 1; *return_value = intbuf > 0 ? TRUE : FALSE; } else { return 1; @@ -13817,12 +14107,14 @@ int rocksdb_check_bulk_load( Rdb_transaction *tx = get_tx_from_thd(thd); if (tx != nullptr) { - const int rc = tx->finish_bulk_load(); - if (rc != 0) { + bool is_critical_error; + const int rc = tx->finish_bulk_load(&is_critical_error); + if (rc != 0 && is_critical_error) { // NO_LINT_DEBUG - sql_print_error("RocksDB: Error %d finalizing last SST file while " - "setting bulk loading variable", - rc); + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "setting bulk loading variable", + rc); THDVAR(thd, bulk_load) = 0; return 1; } @@ -13870,9 +14162,10 @@ static void rocksdb_set_max_background_jobs(THD *thd, if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13898,9 +14191,10 @@ static void rocksdb_set_bytes_per_sync( if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13926,9 +14220,10 @@ static void rocksdb_set_wal_bytes_per_sync( if (!s.ok()) { /* NO_LINT_DEBUG */ - sql_print_warning("MyRocks: failed to update max_background_jobs. " - "Status code = %d, status = %s.", - s.code(), s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); } } @@ -13955,7 +14250,7 @@ static int rocksdb_validate_set_block_cache_size( } if (new_value < RDB_MIN_BLOCK_CACHE_SIZE || - (uint64_t)new_value > (uint64_t)LONGLONG_MAX) { + (uint64_t)new_value > (uint64_t)LLONG_MAX) { return HA_EXIT_FAILURE; } @@ -13971,17 +14266,19 @@ static int rocksdb_validate_set_block_cache_size( return HA_EXIT_SUCCESS; } -static int -rocksdb_validate_update_cf_options(THD * /* unused */, - struct st_mysql_sys_var * /*unused*/, - void *save, struct st_mysql_value *value) { - +static int rocksdb_validate_update_cf_options( + THD * /* unused */, struct st_mysql_sys_var * /*unused*/, void *save, + struct st_mysql_value *value) { char buff[STRING_BUFFER_USUAL_SIZE]; const char *str; int length; length = sizeof(buff); str = value->val_str(value, buff, &length); - *(const char **)save = str; + // In some cases, str can point to buff in the stack. + // This can cause invalid memory access after validation is finished. + // To avoid this kind case, let's alway duplicate the str if str is not + // nullptr + *(const char **)save = (str == nullptr) ? nullptr : my_strdup(str, MYF(0)); if (str == nullptr) { return HA_EXIT_SUCCESS; @@ -13995,13 +14292,17 @@ rocksdb_validate_update_cf_options(THD * /* unused */, my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str); return HA_EXIT_FAILURE; } + // Loop through option_map and create missing column families + for (Rdb_cf_options::Name_to_config_t::iterator it = option_map.begin(); + it != option_map.end(); ++it) { + cf_manager.get_or_create_cf(rdb, it->first); + } return HA_EXIT_SUCCESS; } -static void -rocksdb_set_update_cf_options(THD *const /* unused */, - struct st_mysql_sys_var *const /* unused */, - void *const var_ptr, const void *const save) { +static void rocksdb_set_update_cf_options( + THD *const /* unused */, struct st_mysql_sys_var *const /* unused */, + void *const var_ptr, const void *const save) { const char *const val = *static_cast(save); RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); @@ -14019,7 +14320,7 @@ rocksdb_set_update_cf_options(THD *const /* unused */, // Reset the pointers regardless of how much success we had with updating // the CF options. This will results in consistent behavior and avoids // dealing with cases when only a subset of CF-s was successfully updated. - *reinterpret_cast(var_ptr) = my_strdup(val, MYF(0)); + *reinterpret_cast(var_ptr) = val; // Do the real work of applying the changes. Rdb_cf_options::Name_to_config_t option_map; @@ -14047,9 +14348,10 @@ rocksdb_set_update_cf_options(THD *const /* unused */, if (s != rocksdb::Status::OK()) { // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to convert the options for column " - "family '%s' to a map. %s", cf_name.c_str(), - s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to convert the options for column " + "family '%s' to a map. %s", + cf_name.c_str(), s.ToString().c_str()); } else { DBUG_ASSERT(rdb != nullptr); @@ -14058,14 +14360,16 @@ rocksdb_set_update_cf_options(THD *const /* unused */, if (s != rocksdb::Status::OK()) { // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to apply the options for column " - "family '%s'. %s", cf_name.c_str(), - s.ToString().c_str()); + sql_print_warning( + "MyRocks: failed to apply the options for column " + "family '%s'. %s", + cf_name.c_str(), s.ToString().c_str()); } else { // NO_LINT_DEBUG - sql_print_information("MyRocks: options for column family '%s' " - "have been successfully updated.", - cf_name.c_str()); + sql_print_information( + "MyRocks: options for column family '%s' " + "have been successfully updated.", + cf_name.c_str()); // Make sure that data is internally consistent as well and update // the CF options. This is necessary also to make sure that the CF @@ -14128,18 +14432,39 @@ void ha_rocksdb::rpl_after_update_rows() { DBUG_VOID_RETURN; } +#if 0 +bool ha_rocksdb::is_read_free_rpl_table() const { + return table->s && m_tbl_def->m_is_read_free_rpl_table; +} +#endif + /** @brief - Read Free Replication can be used or not. Returning False means - Read Free Replication can be used. Read Free Replication can be used - on UPDATE or DELETE row events, and table must have user defined - primary key. + Read Free Replication can be used or not. Returning true means + Read Free Replication can be used. */ -bool ha_rocksdb::use_read_free_rpl() { +bool ha_rocksdb::use_read_free_rpl() const { DBUG_ENTER_FUNC(); - DBUG_RETURN((m_in_rpl_delete_rows || m_in_rpl_update_rows) && - !has_hidden_pk(table) && m_use_read_free_rpl); + if (!ha_thd()->rli_slave || table->triggers || /* !is_read_free_rpl_table()*/ ) { + DBUG_RETURN(false); + } + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + switch (rocksdb_read_free_rpl) { + case read_free_rpl_type::OFF: + DBUG_RETURN(false); + case read_free_rpl_type::PK_ONLY: + DBUG_RETURN(!has_hidden_pk(table) && table->s->keys == 1); + case read_free_rpl_type::PK_SK: + DBUG_RETURN(!has_hidden_pk(table)); + } +#else + DBUG_RETURN(false); +#endif + + DBUG_ASSERT(false); + DBUG_RETURN(false); } #endif // MARIAROCKS_NOT_YET @@ -14178,7 +14503,7 @@ void sql_print_verbose_info(const char *format, ...) } } -} // namespace myrocks +} // namespace myrocks /** diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index c5a0afef275..cadbd9abfe9 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -50,7 +50,9 @@ #include "rocksdb/utilities/write_batch_with_index.h" /* MyRocks header files */ +#include "./rdb_buff.h" #include "./rdb_comparator.h" +#include "./rdb_global.h" #include "./rdb_index_merge.h" #include "./rdb_io_watchdog.h" #include "./rdb_perf_context.h" @@ -58,6 +60,8 @@ #include "./rdb_utils.h" #include "./ut0counter.h" +#include "rdb_mariadb_server_port.h" + class Field_blob; class Field_varstring; /** @@ -69,181 +73,13 @@ class Field_varstring; namespace myrocks { -/* - * class for exporting transaction information for - * information_schema.rocksdb_trx - */ -struct Rdb_trx_info { - std::string name; - ulonglong trx_id; - ulonglong write_count; - ulonglong lock_count; - int timeout_sec; - std::string state; - std::string waiting_key; - ulonglong waiting_cf_id; - int is_replication; - int skip_trx_api; - int read_only; - int deadlock_detect; - int num_ongoing_bulk_load; - ulong thread_id; - std::string query_str; -}; - -std::vector rdb_get_all_trx_info(); - -/* - * class for exporting deadlock transaction information for - * information_schema.rocksdb_deadlock - */ -struct Rdb_deadlock_info { - struct Rdb_dl_trx_info { - ulonglong trx_id; - std::string cf_name; - std::string waiting_key; - bool exclusive_lock; - std::string index_name; - std::string table_name; - }; - std::vector path; - int64_t deadlock_time; - ulonglong victim_trx_id; -}; - -std::vector rdb_get_deadlock_info(); - -/* - This is - - the name of the default Column Family (the CF which stores indexes which - didn't explicitly specify which CF they are in) - - the name used to set the default column family parameter for per-cf - arguments. -*/ -extern const std::string DEFAULT_CF_NAME; - -/* - This is the name of the Column Family used for storing the data dictionary. -*/ -extern const std::string DEFAULT_SYSTEM_CF_NAME; - -/* - This is the name of the hidden primary key for tables with no pk. -*/ -const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID"; - -/* - Column family name which means "put this index into its own column family". - DEPRECATED!!! -*/ -extern const std::string PER_INDEX_CF_NAME; - -/* - Name for the background thread. -*/ -const char *const BG_THREAD_NAME = "myrocks-bg"; - -/* - Name for the drop index thread. -*/ -const char *const INDEX_THREAD_NAME = "myrocks-index"; - -/* - Name for the manual compaction thread. -*/ -const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc"; - -/* - Separator between partition name and the qualifier. Sample usage: - - - p0_cfname=foo - - p3_tts_col=bar -*/ -const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_'; - -/* - Separator between qualifier name and value. Sample usage: - - - p0_cfname=foo - - p3_tts_col=bar -*/ -const char RDB_QUALIFIER_VALUE_SEP = '='; - -/* - Separator between multiple qualifier assignments. Sample usage: - - - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz -*/ -const char RDB_QUALIFIER_SEP = ';'; - -/* - Qualifier name for a custom per partition column family. -*/ -const char *const RDB_CF_NAME_QUALIFIER = "cfname"; - -/* - Qualifier name for a custom per partition ttl duration. -*/ -const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration"; - -/* - Qualifier name for a custom per partition ttl duration. -*/ -const char *const RDB_TTL_COL_QUALIFIER = "ttl_col"; - -/* - Default, minimal valid, and maximum valid sampling rate values when collecting - statistics about table. -*/ -#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10 -#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1 -#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100 - -/* - Default and maximum values for rocksdb-compaction-sequential-deletes and - rocksdb-compaction-sequential-deletes-window to add basic boundary checking. -*/ -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000 - -#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0 -#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000 - -/* - Default and maximum values for various compaction and flushing related - options. Numbers are based on the hardware we currently use and our internal - benchmarks which indicate that parallelization helps with the speed of - compactions. - - Ideally of course we'll use heuristic technique to determine the number of - CPU-s and derive the values from there. This however has its own set of - problems and we'll choose simplicity for now. -*/ -#define MAX_BACKGROUND_JOBS 64 - -#define DEFAULT_SUBCOMPACTIONS 1 -#define MAX_SUBCOMPACTIONS 64 - -/* - Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled). -*/ -#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0 - -/* - Defines the field sizes for serializing XID object to a string representation. - string byte format: [field_size: field_value, ...] - [ - 8: XID.formatID, - 1: XID.gtrid_length, - 1: XID.bqual_length, - XID.gtrid_length + XID.bqual_length: XID.data - ] -*/ -#define RDB_FORMATID_SZ 8 -#define RDB_GTRID_SZ 1 -#define RDB_BQUAL_SZ 1 -#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ) - +class Rdb_converter; +class Rdb_key_def; +class Rdb_tbl_def; +class Rdb_transaction; +class Rdb_transaction_impl; +class Rdb_writebatch_impl; +class Rdb_field_encoder; /* collations, used in MariaRocks */ enum collations_used { COLLATION_UTF8MB4_BIN = 46, @@ -255,74 +91,13 @@ enum collations_used { COLLATION_UTF8_BIN = 83 }; -/* - To fix an unhandled exception we specify the upper bound as LONGLONGMAX - instead of ULONGLONGMAX because the latter is -1 and causes an exception when - cast to jlong (signed) of JNI - - The reason behind the cast issue is the lack of unsigned int support in Java. -*/ -#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast(LONGLONG_MAX) - -/* - Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes). - static_assert() in code will validate this assumption. -*/ -#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong) - -/* - Bytes used to store TTL, in the beginning of all records for tables with TTL - enabled. -*/ -#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) - -#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong) - -/* - Maximum index prefix length in bytes. -*/ -#define MAX_INDEX_COL_LEN_LARGE 3072 -#define MAX_INDEX_COL_LEN_SMALL 767 - -/* - MyRocks specific error codes. NB! Please make sure that you will update - HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in - rdb_error_messages to include any new error messages. -*/ -#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1) -#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0) -#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \ - (HA_ERR_ROCKSDB_FIRST + 1) -#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \ - (HA_ERR_ROCKSDB_FIRST + 2) -#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3) -#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4) -#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5) -#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6) -#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7) -#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8) -#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9) -/* - Each error code below maps to a RocksDB status code found in: - rocksdb/include/rocksdb/status.h -*/ -#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10) -#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11) -#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12) -#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13) -#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14) -#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15) -#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16) -#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17) -#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18) -#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19) -#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20) -#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21) -#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22) -#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23) -#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24) -#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25) -#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +extern char *rocksdb_read_free_rpl_tables; +#if defined(HAVE_PSI_INTERFACE) +extern PSI_rwlock_key key_rwlock_read_free_rpl_tables; +#endif +extern Regex_list_handler rdb_read_free_regex_handler; +#endif /** @brief @@ -338,7 +113,7 @@ struct Rdb_table_handler { atomic_stat m_lock_wait_timeout_counter; atomic_stat m_deadlock_counter; - my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock + my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock /* Stores cumulative table statistics */ my_io_perf_atomic_t m_io_perf_read; @@ -352,138 +127,19 @@ struct Rdb_table_handler { uint64_t m_mtcache_last_update; }; -class Rdb_key_def; -class Rdb_tbl_def; -class Rdb_transaction; -class Rdb_transaction_impl; -class Rdb_writebatch_impl; -class Rdb_field_encoder; - -const char *const rocksdb_hton_name = "ROCKSDB"; - -typedef struct _gl_index_id_s { - uint32_t cf_id; - uint32_t index_id; - bool operator==(const struct _gl_index_id_s &other) const { - return cf_id == other.cf_id && index_id == other.index_id; - } - bool operator!=(const struct _gl_index_id_s &other) const { - return cf_id != other.cf_id || index_id != other.index_id; - } - bool operator<(const struct _gl_index_id_s &other) const { - return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id < other.index_id); - } - bool operator<=(const struct _gl_index_id_s &other) const { - return cf_id < other.cf_id || - (cf_id == other.cf_id && index_id <= other.index_id); - } - bool operator>(const struct _gl_index_id_s &other) const { - return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id > other.index_id); - } - bool operator>=(const struct _gl_index_id_s &other) const { - return cf_id > other.cf_id || - (cf_id == other.cf_id && index_id >= other.index_id); - } -} GL_INDEX_ID; - -enum operation_type : int { - ROWS_DELETED = 0, - ROWS_INSERTED, - ROWS_READ, - ROWS_UPDATED, - ROWS_DELETED_BLIND, - ROWS_EXPIRED, - ROWS_FILTERED, - ROWS_HIDDEN_NO_SNAPSHOT, - ROWS_MAX -}; - -enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX }; - -#if defined(HAVE_SCHED_GETCPU) -#define RDB_INDEXER get_sched_indexer_t -#else -#define RDB_INDEXER thread_id_indexer_t -#endif - -/* Global statistics struct used inside MyRocks */ -struct st_global_stats { - ib_counter_t rows[ROWS_MAX]; - - // system_rows_ stats are only for system - // tables. They are not counted in rows_* stats. - ib_counter_t system_rows[ROWS_MAX]; - - ib_counter_t queries[QUERIES_MAX]; - - ib_counter_t covered_secondary_key_lookups; -}; - -/* Struct used for exporting status to MySQL */ -struct st_export_stats { - ulonglong rows_deleted; - ulonglong rows_inserted; - ulonglong rows_read; - ulonglong rows_updated; - ulonglong rows_deleted_blind; - ulonglong rows_expired; - ulonglong rows_filtered; - ulonglong rows_hidden_no_snapshot; - - ulonglong system_rows_deleted; - ulonglong system_rows_inserted; - ulonglong system_rows_read; - ulonglong system_rows_updated; - - ulonglong queries_point; - ulonglong queries_range; - - ulonglong covered_secondary_key_lookups; -}; - -/* Struct used for exporting RocksDB memory status */ -struct st_memory_stats { - ulonglong memtable_total; - ulonglong memtable_unflushed; -}; - -/* Struct used for exporting RocksDB IO stalls stats */ -struct st_io_stall_stats { - ulonglong level0_slowdown; - ulonglong level0_slowdown_with_compaction; - ulonglong level0_numfiles; - ulonglong level0_numfiles_with_compaction; - ulonglong stop_for_pending_compaction_bytes; - ulonglong slowdown_for_pending_compaction_bytes; - ulonglong memtable_compaction; - ulonglong memtable_slowdown; - ulonglong total_stop; - ulonglong total_slowdown; - - st_io_stall_stats() - : level0_slowdown(0), level0_slowdown_with_compaction(0), - level0_numfiles(0), level0_numfiles_with_compaction(0), - stop_for_pending_compaction_bytes(0), - slowdown_for_pending_compaction_bytes(0), memtable_compaction(0), - memtable_slowdown(0), total_stop(0), total_slowdown(0) {} -}; - -} // namespace myrocks - -#include "./rdb_buff.h" +} // namespace myrocks /* Provide hash function for GL_INDEX_ID so we can include it in sets */ namespace std { -template <> struct hash { +template <> +struct hash { std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const { const uint64_t val = ((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id); return std::hash()(val); } }; -} // namespace std +} // namespace std namespace myrocks { @@ -493,9 +149,9 @@ namespace myrocks { */ class ha_rocksdb : public my_core::handler { - my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock + my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock - Rdb_table_handler *m_table_handler; ///< Open table handler + Rdb_table_handler *m_table_handler; ///< Open table handler /* Iterator used for range scans and for full table/index scans */ rocksdb::Iterator *m_scan_it; @@ -536,11 +192,6 @@ class ha_rocksdb : public my_core::handler { */ mutable bool m_pk_can_be_decoded; - /* - TRUE <=> Some fields in the PK may require unpack_info. - */ - bool m_maybe_unpack_info; - uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */ uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */ // ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey? @@ -584,10 +235,13 @@ class ha_rocksdb : public my_core::handler { */ uchar *m_pack_buffer; + /* class to convert between Mysql format and RocksDB format*/ + std::shared_ptr m_converter; + /* Pointer to the original TTL timestamp value (8 bytes) during UPDATE. */ - char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD]; + char *m_ttl_bytes; /* The TTL timestamp value can change if the explicit TTL column is updated. If we detect this when updating the PK, we indicate it here so @@ -598,9 +252,6 @@ class ha_rocksdb : public my_core::handler { /* rowkey of the last record we've read, in StorageFormat. */ String m_last_rowkey; - /* Buffer used by convert_record_to_storage_format() */ - String m_storage_record; - /* Last retrieved record, in table->record[0] data format. @@ -620,15 +271,28 @@ class ha_rocksdb : public my_core::handler { /* TRUE means we are accessing the first row after a snapshot was created */ bool m_rnd_scan_is_new_snapshot; - /* TRUE means the replication slave will use Read Free Replication */ - bool m_use_read_free_rpl; - /* TRUE means we should skip unique key checks for this table if the replication lag gets too large */ bool m_skip_unique_check; + /* + TRUE means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by + remember the failed attempt (if there is one that violates uniqueness check) + in write_row and in the following index_read to skip the lock check and read + entirely + */ + bool m_insert_with_update; + + /* TRUE if last time the insertion failed due to duplicated PK */ + bool m_dup_pk_found; + +#ifndef DBUG_OFF + /* Last retreived record for sanity checking */ + String m_dup_pk_retrieved_record; +#endif + /** @brief This is a bitmap of indexes (i.e. a set) whose keys (in future, values) may @@ -645,7 +309,7 @@ class ha_rocksdb : public my_core::handler { /* MySQL index number for duplicate key error */ - int m_dupp_errkey; + uint m_dupp_errkey; int create_key_defs(const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, @@ -678,11 +342,10 @@ class ha_rocksdb : public my_core::handler { MY_ATTRIBUTE((__nonnull__)); void release_scan_iterator(void); - rocksdb::Status - get_for_update(Rdb_transaction *const tx, - rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *value) const; + rocksdb::Status get_for_update( + Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *value) const; int get_row_by_rowid(uchar *const buf, const char *const rowid, const uint rowid_size, const bool skip_lookup = false, @@ -707,7 +370,7 @@ class ha_rocksdb : public my_core::handler { int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - bool can_use_single_delete(const uint &index) const + bool can_use_single_delete(const uint index) const MY_ATTRIBUTE((__warn_unused_result__)); bool is_blind_delete_enabled(); bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__)); @@ -724,39 +387,6 @@ class ha_rocksdb : public my_core::handler { void set_last_rowkey(const uchar *const old_data); - /* - Array of table->s->fields elements telling how to store fields in the - record. - */ - Rdb_field_encoder *m_encoder_arr; - - /* Describes instructions on how to decode the field */ - class READ_FIELD { - public: - /* Points to Rdb_field_encoder describing the field */ - Rdb_field_encoder *m_field_enc; - /* if true, decode the field, otherwise skip it */ - bool m_decode; - /* Skip this many bytes before reading (or skipping) this field */ - int m_skip; - }; - - /* - This tells which table fields should be decoded (or skipped) when - decoding table row from (pk, encoded_row) pair. (Secondary keys are - just always decoded in full currently) - */ - std::vector m_decoders_vect; - - /* - This tells if any field which is part of the key needs to be unpacked and - decoded. - */ - bool m_key_requested = false; - - /* Setup field_decoders based on type of scan and table->read_set */ - void setup_read_decoders(); - /* For the active index, indicates which columns must be covered for the current lookup to be covered. If the bitmap field is null, that means this @@ -764,14 +394,6 @@ class ha_rocksdb : public my_core::handler { */ MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0}; - /* - Number of bytes in on-disk (storage) record format that are used for - storing SQL NULL flags. - */ - uint m_null_bytes_in_rec; - - void get_storage_type(Rdb_field_encoder *const encoder, const uint &kp); - void setup_field_converters(); int alloc_key_buffers(const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, bool alloc_alter_buffers = false) @@ -779,25 +401,19 @@ class ha_rocksdb : public my_core::handler { void free_key_buffers(); // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE - rocksdb::Range get_range(const int &i, uchar buf[]) const; + rocksdb::Range get_range(const int i, uchar buf[]) const; /* Perf timers for data reads */ Rdb_io_perf m_io_perf; - /* - A counter of how many row checksums were checked for this table. Note that - this does not include checksums for secondary index entries. - */ - my_core::ha_rows m_row_checksums_checked; - /* Update stats */ void update_stats(void); -public: + public: /* The following two are currently only used for getting the range bounds from QUICK_SELECT_DESC. @@ -816,19 +432,19 @@ public: */ bool m_store_row_debug_checksums; - /* Same as above but for verifying checksums when reading */ - bool m_verify_row_debug_checksums; int m_checksums_pct; ha_rocksdb(my_core::handlerton *const hton, my_core::TABLE_SHARE *const table_arg); - ~ha_rocksdb() { + virtual ~ha_rocksdb() override { int err MY_ATTRIBUTE((__unused__)); err = finalize_bulk_load(false); if (err != 0) { - sql_print_error("RocksDB: Error %d finalizing bulk load while closing " - "handler.", - err); + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Error %d finalizing bulk load while closing " + "handler.", + err); } } @@ -898,21 +514,6 @@ public: int rename_table(const char *const from, const char *const to) override MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_blob_from_storage_format(my_core::Field_blob *const blob, - Rdb_string_reader *const reader, - bool decode) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - - int convert_varchar_from_storage_format( - my_core::Field_varstring *const field_var, - Rdb_string_reader *const reader, bool decode) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - - int convert_field_from_storage_format(my_core::Field *const field, - Rdb_string_reader *const reader, - bool decode, uint len) - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_from_storage_format(const rocksdb::Slice *const key, const rocksdb::Slice *const value, uchar *const buf) @@ -925,10 +526,9 @@ public: static const std::vector parse_into_tokens(const std::string &s, const char delim); - static const std::string generate_cf_name(const uint index, - const TABLE *const table_arg, - const Rdb_tbl_def *const tbl_def_arg, - bool *per_part_match_found); + static const std::string generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found); static const char *get_key_name(const uint index, const TABLE *const table_arg, @@ -1067,7 +667,7 @@ public: /* Default implementation from cancel_pushed_idx_cond() suits us */ -private: + private: struct key_def_cf_info { rocksdb::ColumnFamilyHandle *cf_handle; bool is_reverse_cf; @@ -1087,16 +687,6 @@ private: longlong hidden_pk_id; bool skip_unique_check; - - // In certain cases, TTL is enabled on a table, as well as an explicit TTL - // column. The TTL column can be part of either the key or the value part - // of the record. If it is part of the key, we store the offset here. - // - // Later on, we use this offset to store the TTL in the value part of the - // record, which we can then access in the compaction filter. - // - // Set to UINT_MAX by default to indicate that the TTL is not in key. - uint ttl_pk_offset = UINT_MAX; }; /* @@ -1129,23 +719,24 @@ private: std::array *const cfs) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int create_key_def(const TABLE *const table_arg, const uint &i, + int create_key_def(const TABLE *const table_arg, const uint i, const Rdb_tbl_def *const tbl_def_arg, std::shared_ptr *const new_key_def, - const struct key_def_cf_info &cf_info) const + const struct key_def_cf_info &cf_info, uint64 ttl_duration, + const std::string &ttl_column) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int create_inplace_key_defs( const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg, const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, - const std::array &cfs) const + const std::array &cf, + uint64 ttl_duration, const std::string &ttl_column) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - std::unordered_map - get_old_key_positions(const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, - const TABLE *old_table_arg, - const Rdb_tbl_def *old_tbl_def_arg) const + std::unordered_map get_old_key_positions( + const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg) const MY_ATTRIBUTE((__nonnull__)); int compare_key_parts(const KEY *const old_key, @@ -1155,24 +746,20 @@ private: int compare_keys(const KEY *const old_key, const KEY *const new_key) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int convert_record_to_storage_format(const struct update_row_info &row_info, - rocksdb::Slice *const packed_rec) - MY_ATTRIBUTE((__nonnull__)); - bool should_hide_ttl_rec(const Rdb_key_def &kd, const rocksdb::Slice &ttl_rec_val, const int64_t curr_ts) MY_ATTRIBUTE((__warn_unused_result__)); - void rocksdb_skip_expired_records(const Rdb_key_def &kd, - rocksdb::Iterator *const iter, - bool seek_backward); + int rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward); int index_first_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int index_last_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int find_icp_matching_index_rec(const bool &move_forward, uchar *const buf) + int find_icp_matching_index_rec(const bool move_forward, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void calc_updated_indexes(); @@ -1180,20 +767,20 @@ private: const bool skip_unique_check) MY_ATTRIBUTE((__warn_unused_result__)); int get_pk_for_update(struct update_row_info *const row_info); - int check_and_lock_unique_pk(const uint &key_id, + int check_and_lock_unique_pk(const uint key_id, const struct update_row_info &row_info, - bool *const found, bool *const pk_changed) + bool *const found) MY_ATTRIBUTE((__warn_unused_result__)); - int check_and_lock_sk(const uint &key_id, + int check_and_lock_sk(const uint key_id, const struct update_row_info &row_info, bool *const found) MY_ATTRIBUTE((__warn_unused_result__)); int check_uniqueness_and_lock(const struct update_row_info &row_info, - bool *const pk_changed) + bool pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); bool over_bulk_load_threshold(int *err) MY_ATTRIBUTE((__warn_unused_result__)); - int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &index, + int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &key_def, const rocksdb::Slice *key, struct unique_sk_buf_info *sk_info) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -1202,32 +789,36 @@ private: bool sort) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void update_bytes_written(ulonglong bytes_written); - int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info, - const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); - int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, - const struct update_row_info &row_info, const bool bulk_load_sk) + int update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); - int update_indexes(const struct update_row_info &row_info, - const bool &pk_changed) + int update_write_sk(const TABLE *const table_arg, const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) + MY_ATTRIBUTE((__warn_unused_result__)); + int update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, - const bool &using_full_key, - const rocksdb::Slice &key_slice, + const bool using_full_key, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int read_before_key(const Rdb_key_def &kd, const bool &using_full_key, + int read_before_key(const Rdb_key_def &kd, const bool using_full_key, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice, const int64_t ttl_filter_ts) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - int position_to_correct_key( - const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool &full_key_match, const uchar *const key, - const key_part_map &keypart_map, const rocksdb::Slice &key_slice, - bool *const move_forward, const int64_t ttl_filter_ts) + int position_to_correct_key(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const bool full_key_match, const uchar *const key, + const key_part_map &keypart_map, + const rocksdb::Slice &key_slice, + bool *const move_forward, + const int64_t ttl_filter_ts) MY_ATTRIBUTE((__warn_unused_result__)); int read_row_from_primary_key(uchar *const buf) @@ -1239,7 +830,7 @@ private: int calc_eq_cond_len(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, const rocksdb::Slice &slice, - const int &bytes_changed_by_succ, + const int bytes_changed_by_succ, const key_range *const end_key, uint *const end_key_packed_size) MY_ATTRIBUTE((__warn_unused_result__)); @@ -1261,6 +852,10 @@ private: int calculate_stats_for_table() MY_ATTRIBUTE((__warn_unused_result__)); + bool should_skip_invalidated_record(const int rc); + bool should_recreate_snapshot(const int rc, const bool is_new_snapshot); + bool can_assume_tracked(THD *thd); + public: int index_init(uint idx, bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1315,11 +910,15 @@ private: ha_rows records_in_range(uint inx, key_range *const min_key, key_range *const max_key) override MY_ATTRIBUTE((__warn_unused_result__)); + + int delete_table(Rdb_tbl_def *const tbl); int delete_table(const char *const from) override MY_ATTRIBUTE((__warn_unused_result__)); int create(const char *const name, TABLE *const form, HA_CREATE_INFO *const create_info) override MY_ATTRIBUTE((__warn_unused_result__)); + int create_table(const std::string &table_name, const TABLE *table_arg, + ulonglong auto_increment_value); bool check_if_incompatible_data(HA_CREATE_INFO *const info, uint table_changes) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1367,26 +966,25 @@ private: TABLE *const altered_table, my_core::Alter_inplace_info *const ha_alter_info) override; - bool - commit_inplace_alter_table(TABLE *const altered_table, - my_core::Alter_inplace_info *const ha_alter_info, - bool commit) override; + bool commit_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, bool commit) override; -#ifdef MARIAROCKS_NOT_YET // MDEV-10976 - void set_use_read_free_rpl(const char *const whitelist); -#endif void set_skip_unique_check_tables(const char *const whitelist); +#ifdef MARIAROCKS_NOT_YET // MDEV-10976 + bool is_read_free_rpl_table() const; +#endif #ifdef MARIAROCKS_NOT_YET // MDEV-10976 -public: + public: virtual void rpl_before_delete_rows() override; virtual void rpl_after_delete_rows() override; virtual void rpl_before_update_rows() override; virtual void rpl_after_update_rows() override; - virtual bool use_read_free_rpl() override; + virtual bool use_read_free_rpl() const override; #endif // MARIAROCKS_NOT_YET -private: + private: /* Flags tracking if we are inside different replication operation */ bool m_in_rpl_delete_rows; bool m_in_rpl_update_rows; @@ -1436,16 +1034,21 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { std::unordered_set> added_indexes, std::unordered_set dropped_index_ids, uint n_added_keys, uint n_dropped_keys, ulonglong max_auto_incr) - : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), - m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), - m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), - m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), - m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys), + : my_core::inplace_alter_handler_ctx(), + m_new_tdef(new_tdef), + m_old_key_descr(old_key_descr), + m_new_key_descr(new_key_descr), + m_old_n_keys(old_n_keys), + m_new_n_keys(new_n_keys), + m_added_indexes(added_indexes), + m_dropped_index_ids(dropped_index_ids), + m_n_added_keys(n_added_keys), + m_n_dropped_keys(n_dropped_keys), m_max_auto_incr(max_auto_incr) {} ~Rdb_inplace_alter_ctx() {} -private: + private: /* Disable Copying */ Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &); Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); @@ -1460,5 +1063,5 @@ extern bool prevent_myrocks_loading; void sql_print_verbose_info(const char *format, ...); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h index 26417328194..03d24957a23 100644 --- a/storage/rocksdb/ha_rocksdb_proto.h +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -78,7 +78,7 @@ Rdb_cf_manager &rdb_get_cf_manager(); const rocksdb::BlockBasedTableOptions &rdb_get_table_options(); bool rdb_is_ttl_enabled(); bool rdb_is_ttl_read_filtering_enabled(); -#ifndef NDEBUG +#ifndef DBUG_OFF int rdb_dbug_set_ttl_rec_ts(); int rdb_dbug_set_ttl_snapshot_ts(); int rdb_dbug_set_ttl_read_filter_ts(); @@ -100,4 +100,4 @@ Rdb_ddl_manager *rdb_get_ddl_manager(void) class Rdb_binlog_manager; Rdb_binlog_manager *rdb_get_binlog_manager(void) MY_ATTRIBUTE((__warn_unused_result__)); -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h index b2820127711..8902bc18893 100644 --- a/storage/rocksdb/logger.h +++ b/storage/rocksdb/logger.h @@ -22,7 +22,7 @@ namespace myrocks { class Rdb_logger : public rocksdb::Logger { -public: + public: explicit Rdb_logger(const rocksdb::InfoLogLevel log_level = rocksdb::InfoLogLevel::ERROR_LEVEL) : m_mysql_log_level(log_level) {} @@ -77,9 +77,9 @@ public: m_mysql_log_level = log_level; } -private: + private: std::shared_ptr m_logger; rocksdb::InfoLogLevel m_mysql_log_level; }; -} // namespace myrocks +} // namespace myrocks diff --git a/storage/rocksdb/myrocks_hotbackup.py b/storage/rocksdb/myrocks_hotbackup.py index 906ba814776..42c25c95c1c 100755 --- a/storage/rocksdb/myrocks_hotbackup.py +++ b/storage/rocksdb/myrocks_hotbackup.py @@ -44,12 +44,14 @@ class Writer(object): class StreamWriter(Writer): stream_cmd= '' - def __init__(self, stream_option): + def __init__(self, stream_option, direct = 0): super(StreamWriter, self).__init__() if stream_option == 'tar': self.stream_cmd= 'tar chf -' elif stream_option == 'xbstream': self.stream_cmd= 'xbstream -c' + if direct: + self.stream_cmd = self.stream_cmd + ' -d' else: raise Exception("Only tar or xbstream is supported as streaming option.") @@ -341,6 +343,13 @@ class MySQLUtil: row = cur.fetchone() return row[0] + @staticmethod + def is_directio_enabled(dbh): + sql = "SELECT @@global.rocksdb_use_direct_reads" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] class BackupRunner: datadir = None @@ -362,9 +371,7 @@ class BackupRunner: try: signal.signal(signal.SIGINT, signal_handler) w = None - if opts.output_stream: - w = StreamWriter(opts.output_stream) - else: + if not opts.output_stream: raise Exception("Currently only streaming backup is supported.") snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round) @@ -372,6 +379,11 @@ class BackupRunner: opts.mysql_password, opts.mysql_port, opts.mysql_socket) + direct = MySQLUtil.is_directio_enabled(dbh) + logger.info("Direct I/O: %d", direct) + + w = StreamWriter(opts.output_stream, direct) + if not self.datadir: self.datadir = MySQLUtil.get_datadir(dbh) logger.info("Set datadir: %s", self.datadir) diff --git a/storage/rocksdb/mysql-test/rocksdb/combinations b/storage/rocksdb/mysql-test/rocksdb/combinations index d49de3d38b3..be8080d4b9b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/combinations +++ b/storage/rocksdb/mysql-test/rocksdb/combinations @@ -3,4 +3,3 @@ loose-rocksdb_write_policy=write_committed [write_prepared] loose-rocksdb_write_policy=write_prepared -loose-rocksdb_commit_time_batch_for_recovery=on diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc index 8ec97510dbd..29ec94188a2 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc @@ -89,20 +89,32 @@ EOF # Make sure a snapshot held by another user doesn't block the bulk load connect (other,localhost,root,,); set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; connection default; + +# Update CF to smaller value to create multiple SST in ingestion +eval SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= + '$pk_cf_name={write_buffer_size=8m;target_file_size_base=1m};'; + set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; --disable_query_log --echo LOAD DATA INFILE INTO TABLE t1; eval LOAD DATA INFILE '$file' INTO TABLE t1; +# There should be no SST being ingested +select * from t1; --echo LOAD DATA INFILE INTO TABLE t2; eval LOAD DATA INFILE '$file' INTO TABLE t2; +# There should be no SST being ingested +select * from t2; --echo LOAD DATA INFILE INTO TABLE t3; eval LOAD DATA INFILE '$file' INTO TABLE t3; +# There should be no SST being ingested +select * from t3; --enable_query_log set rocksdb_bulk_load=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc index 5f808087e3e..46aea8f23b5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -100,9 +100,10 @@ EOF # Make sure a snapshot held by another user doesn't block the bulk load connect (other,localhost,root,,); set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; connection default; set rocksdb_bulk_load=1; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc new file mode 100644 index 00000000000..233635b369e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc @@ -0,0 +1,298 @@ +CREATE TABLE `link_table` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +CREATE TABLE `link_table2` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) + COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; + +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); + +insert into link_table2 select * from link_table; + +CREATE TABLE `id_table` ( + `id` bigint(20) NOT NULL DEFAULT '0', + `type` int(11) NOT NULL DEFAULT '0', + `row_created_time` int(11) NOT NULL DEFAULT '0', + `hash_key` varchar(255) NOT NULL DEFAULT '', + `is_deleted` tinyint(4) DEFAULT '0', + PRIMARY KEY (`id`), + KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; + +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); + +CREATE TABLE `node_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + `update_time` int(10) unsigned NOT NULL DEFAULT '0', + `data` mediumtext COLLATE latin1_bin NOT NULL, + PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', + KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into node_table values (1, 1, 1, 10, 'data'); + +insert into node_table values (2, 1, 1, 10, 'data'); + +insert into node_table values (3, 1, 1, 10, 'data'); + +insert into node_table values (4, 1, 1, 10, 'data'); + +insert into node_table values (5, 1, 1, 10, 'data'); + +insert into node_table values (6, 1, 1, 10, 'data'); + +insert into node_table values (7, 1, 1, 10, 'data'); + +insert into node_table values (8, 1, 1, 10, 'data'); + +insert into node_table values (9, 1, 1, 10, 'data'); + +insert into node_table values (10, 1, 1, 10, 'data'); + +CREATE TABLE `count_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `count` int(10) unsigned NOT NULL DEFAULT '0', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + + +insert into count_table values (2, 1, 1, 1, 10, 20); + +insert into count_table values (3, 1, 1, 1, 10, 20); + +insert into count_table values (4, 1, 1, 1, 10, 20); + +insert into count_table values (5, 1, 1, 1, 10, 20); + +insert into count_table values (6, 1, 1, 1, 10, 20); + +insert into count_table values (7, 1, 1, 1, 10, 20); + +insert into count_table values (8, 1, 1, 1, 10, 20); + +insert into count_table values (9, 1, 1, 1, 10, 20); + +insert into count_table values (10, 1, 1, 1, 10, 20); + +CREATE TABLE `link_table5` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + + +CREATE TABLE `link_table3` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + +CREATE TABLE `link_table6` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, + `data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table6 values (1, 1, 2, 2, 1, 1, + 'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, + 'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, + 'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, + 'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, + 'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, + 'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, + 'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); + +CREATE TABLE `link_table4` ( + `id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', + `raw_key` text COLLATE latin1_bin, + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); diff --git a/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc new file mode 100644 index 00000000000..79ac367a73b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc @@ -0,0 +1,1425 @@ +# +# Test file for WL#1724 (Min/Max Optimization for Queries with Group By Clause). +# The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT. +# + +# +# TODO: +# Add queries with: +# - C != const +# - C IS NOT NULL +# - HAVING clause + +--disable_warnings +drop table if exists t1; +--enable_warnings + +eval create table t1 ( + a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' ' +) engine=$engine; + +insert into t1 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'), +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'), +('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'), +('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'), +('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'), +('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'); + +create index idx_t1_0 on t1 (a1); +create index idx_t1_1 on t1 (a1,a2,b,c); +create index idx_t1_2 on t1 (a1,a2,b); +analyze table t1; + +# t2 is the same as t1, but with some NULLs in the MIN/MAX column, and +# one more nullable attribute + +--disable_warnings +drop table if exists t2; +--enable_warnings + +eval create table t2 ( + a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' ' +) engine=$engine; +insert into t2 select * from t1; +# add few rows with NULL's in the MIN/MAX column +insert into t2 (a1, a2, b, c, d) values +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'), +('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'), +('a','a','a',NULL,'xyz'), +('a','a','b',NULL,'xyz'), +('a','b','a',NULL,'xyz'), +('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'), +('d','b','b',NULL,'xyz'), +('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'), +('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'); + +create index idx_t2_0 on t2 (a1); +create index idx_t2_1 on t2 (a1,a2,b,c); +create index idx_t2_2 on t2 (a1,a2,b); +analyze table t2; + +# Table t3 is the same as t1, but with smaller column lenghts. +# This allows to test different branches of the cost computation procedure +# when the number of keys per block are less than the number of keys in the +# sub-groups formed by predicates over non-group attributes. + +--disable_warnings +drop table if exists t3; +--enable_warnings + +eval create table t3 ( + a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' ' +) engine=$engine; + +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); +insert into t3 (a1, a2, b, c, d) values +('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'), +('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'), +('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'), +('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'), +('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'), +('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'), +('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'), +('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'), +('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'), +('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'), +('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'), +('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'); + +create index idx_t3_0 on t3 (a1); +create index idx_t3_1 on t3 (a1,a2,b,c); +create index idx_t3_2 on t3 (a1,a2,b); +analyze table t3; + + +# +# Queries without a WHERE clause. These queries do not use ranges. +# + +# plans +explain select a1, min(a2) from t1 group by a1; +explain select a1, max(a2) from t1 group by a1; +explain select a1, min(a2), max(a2) from t1 group by a1; +explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +--replace_column 7 # 9 # +explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +# Select fields in different order +explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +explain select min(a2) from t1 group by a1; +explain select a2, min(c), max(c) from t1 group by a1,a2,b; + +# queries +select a1, min(a2) from t1 group by a1; +select a1, max(a2) from t1 group by a1; +select a1, min(a2), max(a2) from t1 group by a1; +select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b; +select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b; +select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b; +# Select fields in different order +select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1; +select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b; +select min(a2) from t1 group by a1; +select a2, min(c), max(c) from t1 group by a1,a2,b; + +# +# Queries with a where clause +# + +# A) Preds only over the group 'A' attributes +# plans +explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +--replace_column 9 # +explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; + +# queries +select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b; +select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b; + +select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b; +select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b; +select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b; + +# B) Equalities only over the non-group 'B' attributes +# plans +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost() +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +# queries +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; +select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2; +select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2; +select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2; + +# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost() +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1; +select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1; +select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1; + + +# IS NULL (makes sense for t2 only) +# plans +explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; + +explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; + +explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; + +# queries +select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1; +select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1; +select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1; +select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; +select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; + +# C) Range predicates for the MIN/MAX attribute +# plans +--replace_column 9 # +explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; + +# queries +select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b; + +select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b; +select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b; + +# analyze the sub-select +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c = t1.c ) +group by a1,a2,b; + +# the sub-select is unrelated to MIN/MAX +explain select a1,a2,b,min(c),max(c) from t1 +where exists ( select * from t2 where t2.c > 'b1' ) +group by a1,a2,b; + + +# A,B,C) Predicates referencing mixed classes of attributes +# plans +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# queries +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b; + +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b; +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b; +select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b; +select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + + +# +# GROUP BY queries without MIN/MAX +# + +# plans +explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +--replace_column 9 # +explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# queries +select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b; +select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b; +select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; + +# +# DISTINCT queries +# + +# plans +explain select distinct a1,a2,b from t1; +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; + +--replace_column 9 # +explain select distinct a1,a2,b from t2; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; + +# queries +select distinct a1,a2,b from t1; +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select distinct b from t1 where (a2 >= 'b') and (b = 'a'); +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b'; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e'; + +select distinct a1,a2,b from t2; +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select distinct b from t2 where (a2 >= 'b') and (b = 'a'); +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b'; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e'; + +# BUG #6303 +select distinct t_00.a1 +from t1 t_00 +where exists ( select * from t2 where a1 = t_00.a1 ); + +# BUG #8532 - SELECT DISTINCT a, a causes server to crash +select distinct a1,a1 from t1; +select distinct a2,a1,a2,a1 from t1; +select distinct t1.a1,t2.a1 from t1,t2; + + +# +# DISTINCT queries with GROUP-BY +# + +# plans +explain select distinct a1,a2,b from t1; +explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +--replace_column 9 # +explain select distinct a1,a2,b from t2; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +--replace_column 9 # +explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +--replace_column 9 # +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +--replace_column 9 # +explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +# queries +select distinct a1,a2,b from t1; +select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1; +select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1; + +select distinct a1,a2,b from t2; +select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b; +select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b; +select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1; +select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1; + + +# +# COUNT (DISTINCT cols) queries +# + +explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); + +select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); +select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); +select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); +select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a'); +select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a'); + +# +# Queries with expressions in the select clause +# + +explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; + +select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b; +select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b; +select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2; + + +# +# Negative examples: queries that should NOT be treated as optimizable by +# QUICK_GROUP_MIN_MAX_SELECT +# + +# select a non-indexed attribute +explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b; + +explain select a1,a2,b,d from t1 group by a1,a2,b; + +# predicate that references an attribute that is after the MIN/MAX argument +# in the index +explain extended select a1,a2,min(b),max(b) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2; + +# predicate that references a non-indexed attribute +explain extended select a1,a2,b,min(c),max(c) from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b; + +explain extended select a1,a2,b,c from t1 +where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c; + +# non-equality predicate for a non-group select attribute +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1; +explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; + +# non-group field with an equality predicate that references a keypart after the +# MIN/MAX argument +explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; + +# disjunction for a non-group select attribute +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1; + +# non-range predicate for the MIN/MAX attribute +explain select a1,a2,b,min(c),max(c) from t2 +where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b; + +# not all attributes are indexed by one index +explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c; + +# other aggregate functions than MIN/MAX +explain select a1,a2,count(a2) from t1 group by a1,a2,b; +explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b; +explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b; + +# test multi_range_groupby flag +set optimizer_switch = 'multi_range_groupby=off'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; +set optimizer_switch = 'default'; +explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1; + + +# +# Bug #16710: select distinct doesn't return all it should +# + +explain select distinct(a1) from t1 where ord(a2) = 98; +select distinct(a1) from t1 where ord(a2) = 98; + +# +# BUG#11044: DISTINCT or GROUP BY queries with equality predicates instead of MIN/MAX. +# + +explain select a1 from t1 where a2 = 'b' group by a1; +select a1 from t1 where a2 = 'b' group by a1; + +explain select distinct a1 from t1 where a2 = 'b'; +select distinct a1 from t1 where a2 = 'b'; + +# +# Bug #12672: primary key implcitly included in every innodb index +# +# Test case moved to group_min_max_innodb + + +# +# Bug #6142: a problem with the empty innodb table +# +# Test case moved to group_min_max_innodb + + +# +# Bug #9798: group by with rollup +# +# Test case moved to group_min_max_innodb + + +# +# Bug #13293 Wrongly used index results in endless loop. +# +# Test case moved to group_min_max_innodb + + +drop table t1,t2,t3; + +# +# Bug #14920 Ordering aggregated result sets with composite primary keys +# corrupts resultset +# +eval create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=$engine; +insert into t1 (c1,c2) values +(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9); +select distinct c1, c2 from t1 order by c2; +select c1,min(c2) as c2 from t1 group by c1 order by c2; +select c1,c2 from t1 group by c1,c2 order by c2; +drop table t1; + +# +# Bug #16203: Analysis for possible min/max optimization erroneously +# returns impossible range +# + +eval CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=$engine; +INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4); +OPTIMIZE TABLE t1; + +SELECT a FROM t1 WHERE a='AA' GROUP BY a; +SELECT a FROM t1 WHERE a='BB' GROUP BY a; + +EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a; +EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a; + +SELECT DISTINCT a FROM t1 WHERE a='BB'; +SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%'; +SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a; + +DROP TABLE t1; + + +# +# Bug #15102: select distinct returns empty result, select count +# distinct > 0 (correct) +# + +CREATE TABLE t1 ( + a int(11) NOT NULL DEFAULT '0', + b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '', + PRIMARY KEY (a,b) + ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci; + +delimiter |; + +CREATE PROCEDURE a(x INT) +BEGIN + DECLARE rnd INT; + DECLARE cnt INT; + + WHILE x > 0 DO + SET rnd= x % 100; + SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd); + INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR)); + SET x= x - 1; + END WHILE; +END| + +DELIMITER ;| + +CALL a(1000); + +SELECT a FROM t1 WHERE a=0; +SELECT DISTINCT a FROM t1 WHERE a=0; +SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0; + +DROP TABLE t1; +DROP PROCEDURE a; + +# +# Bug #18068: SELECT DISTINCT +# + +eval CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=$engine; + +INSERT INTO t1 (a) VALUES + (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'), + ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'), + ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN'); + +EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a; +SELECT DISTINCT a,a FROM t1 ORDER BY a; + +DROP TABLE t1; + +# +# Bug #21007: NATURAL JOIN (any JOIN (2 x NATURAL JOIN)) crashes the server +# + +eval CREATE TABLE t1 (id1 INT, id2 INT) engine=$engine; +eval CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=$engine; +eval CREATE TABLE t3 (id3 INT, id4 INT) engine=$engine; +eval CREATE TABLE t4 (id4 INT) engine=$engine; +eval CREATE TABLE t5 (id5 INT, id6 INT) engine=$engine; +eval CREATE TABLE t6 (id6 INT) engine=$engine; + +INSERT INTO t1 VALUES(1,1); +INSERT INTO t2 VALUES(1,1,1); +INSERT INTO t3 VALUES(1,1); +INSERT INTO t4 VALUES(1); +INSERT INTO t5 VALUES(1,1); +INSERT INTO t6 VALUES(1); + +# original bug query +SELECT * FROM +t1 + NATURAL JOIN +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); + +# inner join swapped +SELECT * FROM +t1 + NATURAL JOIN +(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2 + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)); + +# one join less, no ON cond +SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2); + +# wrong error message: 'id2' - ambiguous column +SELECT * FROM +(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6) + ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5)) + NATURAL JOIN +t1; +SELECT * FROM +(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6))) + NATURAL JOIN +t1; + +DROP TABLE t1,t2,t3,t4,t5,t6; + +# +# Bug#22342: No results returned for query using max and group by +# +eval CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=$engine; +INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3); +ANALYZE TABLE t1; + +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a; +eval CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=$engine; +INSERT INTO t2 SELECT a,b,b FROM t1; +ANALYZE TABLE t2; +explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; +SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; + +DROP TABLE t1,t2; + +# +# Bug#24156: Loose index scan not used with CREATE TABLE ...SELECT and similar statements +# + +eval CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5), + (2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6); +ANALYZE TABLE t1; +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +FLUSH STATUS; +SELECT max(b), a FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +EXPLAIN SELECT max(b), a FROM t1 GROUP BY a; +FLUSH STATUS; +eval CREATE TABLE t2 engine=$engine SELECT max(b), a FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +(SELECT max(b), a FROM t1 GROUP BY a) UNION + (SELECT max(b), a FROM t1 GROUP BY a); +SHOW STATUS LIKE 'handler_read__e%'; +EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION + (SELECT max(b), a FROM t1 GROUP BY a); + +EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1 AS t1_outer; +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS + (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE + (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12; +EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE + a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING + a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); +EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2 + ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) + AND t1_outer1.b = t1_outer2.b; +EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2; + +CREATE TABLE t3 LIKE t1; +FLUSH STATUS; +INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a; +SHOW STATUS LIKE 'handler_read__e%'; +DELETE FROM t3; +FLUSH STATUS; +INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) + FROM t1 LIMIT 1; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000; +SHOW STATUS LIKE 'handler_read__e%'; +FLUSH STATUS; +--error ER_SUBQUERY_NO_1_ROW +DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x + FROM t1) > 10000; +SHOW STATUS LIKE 'handler_read__e%'; + +DROP TABLE t1,t2,t3; + +# +# Bug#25602: queries with DISTINCT and SQL_BIG_RESULT hint +# for which loose scan optimization is applied +# + +eval CREATE TABLE t1 (a int, INDEX idx(a)) engine=$engine; +INSERT INTO t1 VALUES + (4), (2), (1), (2), (4), (2), (1), (4), + (4), (2), (1), (2), (2), (4), (1), (4); +ANALYZE TABLE t1; + +EXPLAIN SELECT DISTINCT(a) FROM t1; +SELECT DISTINCT(a) FROM t1; +EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; +SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; + +DROP TABLE t1; + +# +# Bug #32268: Indexed queries give bogus MIN and MAX results +# + +eval CREATE TABLE t1 (a INT, b INT) engine=$engine; +INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3); +INSERT INTO t1 SELECT a + 1, b FROM t1; +INSERT INTO t1 SELECT a + 2, b FROM t1; +ANALYZE TABLE t1; + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; + +CREATE INDEX break_it ON t1 (a, b); + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a; + +EXPLAIN +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC; + +EXPLAIN +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; +SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC; + +DROP TABLE t1; + +# +# Bug#38195: Incorrect handling of aggregate functions when loose index scan is +# used causes server crash. +# +create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM; +insert into t1 (a,b) values +(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6), + (0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13), +(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6), + (1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13), +(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6), + (2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13), +(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6), + (3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13); +insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a; +ANALYZE TABLE t1; +select * from t1; +explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a; +drop table t1; + + +# +# Bug #41610: key_infix_len can be overwritten causing some group by queries +# to return no rows +# + +eval CREATE TABLE t1 (a int, b int, c int, d int, + KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=$engine; + +INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT a,b,c+1,d FROM t1; +ANALYZE TABLE t1; + +#Should be non-empty +EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4; +SELECT DISTINCT c FROM t1 WHERE d=4; + +DROP TABLE t1; + +--echo # +--echo # Bug #45386: Wrong query result with MIN function in field list, +--echo # WHERE and GROUP BY clause +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; + +--echo # test MIN +--echo #should use range with index for group by +EXPLAIN +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; +--echo #should return 1 row +SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a; + +--echo # test MAX +--echo #should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; +--echo #should return 1 row +SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a; + +--echo # test 3 ranges and use the middle one +INSERT INTO t SELECT a, 2 FROM t; + +--echo #should use range with index for group by +EXPLAIN +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; +--echo #should return 1 row +SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a; + +DROP TABLE t; + +--echo # +--echo # Bug #48472: Loose index scan inappropriately chosen for some WHERE +--echo # conditions +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; +ANALYZE TABLE t; + +SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a; + +DROP TABLE t; + +--echo End of 5.0 tests + +--echo # +--echo # Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in +--echo # server crash +--echo # + +eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine; +INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1); +INSERT INTO t SELECT * FROM t; + +SELECT a, MAX(b) FROM t WHERE b GROUP BY a; + +DROP TABLE t; + +# +# BUG#49902 - SELECT returns incorrect results +# +eval CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=$engine; +INSERT INTO t1 VALUES(1,1),(2,1); +ANALYZE TABLE t1; +SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b; +SELECT a FROM t1 WHERE b=1; +DROP TABLE t1; + +--echo # +--echo # Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column +--echo # for NULL +--echo # + +--echo ## Test for NULLs allowed +eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +--source include/min_null_cond.inc +INSERT INTO t1 VALUES (NULL), (NULL); +ANALYZE TABLE t1; +--source include/min_null_cond.inc +DROP TABLE t1; + +--echo ## Test for NOT NULLs +eval CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); +ANALYZE TABLE t1; +--echo # +--echo # NULL-safe operator test disabled for non-NULL indexed columns. +--echo # +--echo # See bugs +--echo # +--echo # - Bug#52173: Reading NULL value from non-NULL index gives +--echo # wrong result in embedded server +--echo # +--echo # - Bug#52174: Sometimes wrong plan when reading a MAX value from +--echo # non-NULL index +--echo # +--let $skip_null_safe_test= 1 +--source include/min_null_cond.inc +DROP TABLE t1; + +--echo # +--echo # Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List&, Item*) at +--echo # opt_sum.cc:305 +--echo # +eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine; +INSERT INTO t1 VALUES (1), (2), (3); + +SELECT MIN( a ) AS min_a +FROM t1 +WHERE a > 1 AND a IS NULL +ORDER BY min_a; + +DROP TABLE t1; + + +--echo End of 5.1 tests + + +--echo # +--echo # WL#3220 (Loose index scan for COUNT DISTINCT) +--echo # + +eval CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=$engine; +INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1); +INSERT INTO t1 SELECT a, b + 4, 1 FROM t1; +INSERT INTO t1 SELECT a + 1, b, 1 FROM t1; +ANALYZE TABLE t1; +eval CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=$engine; +INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1), + (1,4,1,1,1,1); +INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2; +INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2; +ANALYZE TABLE t2; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1; +SELECT COUNT(DISTINCT a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1; +SELECT COUNT(DISTINCT a,b) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1; +SELECT COUNT(DISTINCT b,a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1; +SELECT COUNT(DISTINCT b) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; +SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; +SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b; + +EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1; +SELECT DISTINCT COUNT(DISTINCT a) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1; +SELECT COUNT(DISTINCT a, b + 0) FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; + +EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; +SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; + +EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; +SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; + +EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; +SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1; + +EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; +SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a; + +EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1; +SELECT COUNT(DISTINCT a), 12 FROM t1; + +EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2; +SELECT COUNT(DISTINCT a, b, c) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; +SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2; + +EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; +SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2; + +EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; +SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c; + +EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2 + WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; +SELECT COUNT(DISTINCT c, a, b) FROM t2 + WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c; + +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 + GROUP BY b; +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5 + GROUP BY b; + +EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; +SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a; + +EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; +SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42; + +EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 + WHERE b = 13 AND c = 42 GROUP BY a; +SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 + WHERE b = 13 AND c = 42 GROUP BY a; + +--echo # This query could have been resolved using loose index scan since +--echo # the second part of count(..) is defined by a constant predicate +EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; +SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; + +EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; +SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a; + +EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; +SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c; + +EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; +SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a; + +DROP TABLE t1,t2; + +--echo # end of WL#3220 tests + +--echo # +--echo # Bug#50539: Wrong result when loose index scan is used for an aggregate +--echo # function with distinct +--echo # +eval CREATE TABLE t1 ( + f1 int(11) NOT NULL DEFAULT '0', + f2 char(1) NOT NULL DEFAULT '', + PRIMARY KEY (f1,f2) +) engine=$engine; +insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), +(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D'); +ANALYZE TABLE t1; + +SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; + +drop table t1; +--echo # End of test#50539. + +--echo # +--echo # Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND +--echo # "HAVING SUM(DISTINCT)": WRONG RESULTS. +--echo # + +eval CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=$engine; +INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5); +ANALYZE TABLE t; +let $DEFAULT_TRACE_MEM_SIZE=1048576; # 1MB +eval set optimizer_trace_max_mem_size=$DEFAULT_TRACE_MEM_SIZE; +set @@session.optimizer_trace='enabled=on'; +set end_markers_in_json=on; + +ANALYZE TABLE t; + +SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a); +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a; +SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK + FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +SET optimizer_trace_max_mem_size=DEFAULT; +SET optimizer_trace=DEFAULT; +SET end_markers_in_json=DEFAULT; + +DROP TABLE t; + +--echo # +--echo # Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD +--echo # + +eval CREATE TABLE t1 ( +id INT AUTO_INCREMENT PRIMARY KEY, +c1 INT, +c2 INT, +KEY(c1,c2)) engine=$engine; + +INSERT INTO t1(c1,c2) VALUES +(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3), +(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13), +(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5); +ANALYZE TABLE t1; + +EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +FLUSH STATUS; +SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1; +SHOW SESSION STATUS LIKE 'Handler_read%'; + +DROP TABLE t1; + +--echo # End of test for Bug#18109609 diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc new file mode 100644 index 00000000000..d9b4b46b25a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc @@ -0,0 +1,23 @@ +# Common test pattern for options that control direct i/o +# +# Required input: +# $io_option - name and assignment to enable on server command line + +--perl +use Cwd 'abs_path'; + +open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die; +my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'}); +my $in_shm= (index($real_path, "/dev/shm") != -1) || + (index($real_path, "/run/shm") != -1); +print FILE "let \$DATA_IN_SHM= $in_shm;\n"; +close FILE; +EOF + +--source $MYSQL_TMP_DIR/data_in_shm.inc +--remove_file $MYSQL_TMP_DIR/data_in_shm.inc + +if ($DATA_IN_SHM) +{ + --skip DATADIR is in /{dev|run}/shm, possibly due to --mem +} diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc index c23717c4fda..34947cb0ecb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc @@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc index da80f796750..8140b81a95e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc @@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; INSERT INTO t0 VALUES(200001,1), (-1,1); diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc index b77a54e4360..e28f1c90b3a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc @@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID /* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc index 9494146ba5c..13ceca07913 100644 --- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc @@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`; send SELECT * FROM t0 WHERE value > 0 FOR UPDATE; connection con2; -let $wait_condition = SELECT 1 FROM information_schema.processlist - WHERE id = $ID AND state = "Sending data"; +let $wait_condition = + SELECT 1 FROM information_schema.processlist + WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data"; --source include/wait_condition.inc eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; BEGIN; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc deleted file mode 100644 index 71e713226d7..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc +++ /dev/null @@ -1,53 +0,0 @@ -# Usage: -# -# let $order = ASC; # or DESC -# let $comment = "rev:cf2"; # or "" -# --source suite/rocksdb/include/rocksdb_concurrent_delete.inc - -let $first_row = -1; # Error this should never happen -if ($order == 'ASC') -{ - let $first_row = 1; -} -if ($order == 'DESC') -{ - let $first_row = 3; -} - -connect (con, localhost, root,,); -connection default; - ---disable_warnings -SET debug_sync='RESET'; -DROP TABLE IF EXISTS t1; ---enable_warnings - -eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3); - -# This will cause the SELECT to block after finding the first row, but -# before locking and reading it. -connection con; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; - -# While that connection is waiting, delete the first row (the one con -# is about to lock and read -connection default; -SET debug_sync='now WAIT_FOR parked'; -eval DELETE FROM t1 WHERE pk = $first_row; - -# Signal the waiting select to continue -SET debug_sync='now SIGNAL go'; - -# Now get the results from the select. The first entry (1,1) (or (3,3) when -# using reverse ordering) should be missing. Prior to the fix the SELECT -# would have returned: "1815: Internal error: NotFound:" -connection con; -reap; - -# Cleanup -connection default; -disconnect con; -set debug_sync='RESET'; -drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc new file mode 100644 index 00000000000..da16e1c9c3b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc @@ -0,0 +1,23 @@ +# Common test pattern for options that control direct i/o +# +# Required input: +# $io_option - name and assignment to enable on server command line + +--source include/have_direct_io.inc + +--echo Checking direct reads +--let $_mysqld_option=$io_option +--source include/restart_mysqld_with_option.inc + +CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb; +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1, 1,'a'); +INSERT INTO t1 (a,b) VALUES (2,'b'); +set global rocksdb_force_flush_memtable_now=1; +--sorted_result +SELECT a,b FROM t1; +DROP TABLE t1; + +# cleanup +--let _$mysqld_option= +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result index 0617232f1e3..e7883f7e03e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result @@ -17,7 +17,7 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; set @tmp= @@rocksdb_max_row_locks; set session rocksdb_max_row_locks=1000; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; -ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to max_num_locks limit' from ROCKSDB +ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB set session rocksdb_bulk_load=1; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; set session rocksdb_bulk_load=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result index f59b841a595..cc47ceff7ca 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result @@ -176,3 +176,24 @@ INSERT INTO t1 (a) VALUES (1); UPDATE t1 SET pk = 3; ALTER TABLE t1 AUTO_INCREMENT 2; DROP TABLE t1; +#---------------------------------- +# Issue #902 Debug assert in autoincrement with small field type +#---------------------------------- +SET auto_increment_increment=100, auto_increment_offset=10; +CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615; +INSERT INTO t1 VALUES (NULL); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +i +ALTER TABLE t1 AUTO_INCREMENT=1; +INSERT INTO t1 VALUES (NULL); +SELECT * FROM t1; +i +10 +ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615; +INSERT INTO t1 VALUES (NULL); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +i +10 +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result similarity index 91% rename from storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result rename to storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result index a3fc25cc81b..973d1876fa0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result @@ -3,6 +3,7 @@ Warnings: Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. [connection master] +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key; set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api; DROP TABLE IF EXISTS t1,t2; @@ -17,6 +18,7 @@ SELECT count(*) FROM t1; count(*) 9000 include/sync_slave_sql_with_master.inc +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT count(*) FROM t1; count(*) 9000 @@ -71,14 +73,14 @@ count(*) call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*"); call mtr.add_suppression("Slave: Can't find record in 't1'.*"); include/wait_for_slave_sql_error.inc [errno=1032] -set @save_rocksdb_read_free_rpl_tables=@@global.rocksdb_read_free_rpl_tables; -set global rocksdb_read_free_rpl_tables="t.*"; +set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl; +set global rocksdb_read_free_rpl=PK_SK; START SLAVE; include/sync_slave_sql_with_master.inc SELECT count(*) FROM t1; count(*) 7000 -set global rocksdb_read_free_rpl_tables=@save_rocksdb_read_free_rpl_tables; +set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl; SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key; SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api; DROP TABLE t1, t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result new file mode 100644 index 00000000000..683b672e360 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result @@ -0,0 +1,87 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key; +set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api; +DROP TABLE IF EXISTS t1,t2; +create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb; +create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb; +SET session rocksdb_blind_delete_primary_key=1; +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +1000 +SELECT count(*) FROM t1; +count(*) +9000 +include/sync_slave_sql_with_master.inc +SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SELECT count(*) FROM t1; +count(*) +9000 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +0 +SELECT count(*) FROM t2; +count(*) +9000 +SET session rocksdb_master_skip_tx_api=1; +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +1000 +SELECT count(*) FROM t1; +count(*) +8000 +SELECT count(*) FROM t2; +count(*) +8000 +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +8000 +SELECT count(*) FROM t2; +count(*) +8000 +select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000; +DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000; +select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind'; +variable_value-@c +0 +SELECT count(*) FROM t1; +count(*) +7000 +SELECT count(*) FROM t2; +count(*) +7000 +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +7000 +SELECT count(*) FROM t2; +count(*) +7000 +DELETE FROM t1 WHERE id = 10; +SELECT count(*) FROM t1; +count(*) +7000 +call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*"); +call mtr.add_suppression("Slave: Can't find record in 't1'.*"); +include/wait_for_slave_sql_error.inc [errno=1032] +set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl; +set global rocksdb_read_free_rpl=PK_SK; +START SLAVE; +include/sync_slave_sql_with_master.inc +SELECT count(*) FROM t1; +count(*) +7000 +set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl; +SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key; +SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api; +DROP TABLE t1, t2; +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result index 6ad9867049d..f3c4fdf1040 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result @@ -20,6 +20,24 @@ id1 id2 link_type visibility data time version select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; case when variable_value-@c > 0 then 'true' else 'false' end true +# MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS +# to get the query plan we want. +set @tmp_use_stat_tables= @@use_stat_tables; +set use_stat_tables='preferably'; +analyze table linktable persistent for all; +Table Op Msg_type Msg_text +test.linktable analyze status Engine-independent statistics collected +test.linktable analyze status OK +flush tables; +explain select * from linktable; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE linktable ALL NULL NULL NULL NULL 10000 +# This must use range(id1_type2), key_len=24 +explain +select id1, id2, link_type, visibility, data, time, version from linktable +FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE linktable range id1_type2 id1_type2 24 NULL 1000 Using where; Using index select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked'; select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc; id1 id2 link_type visibility data time version diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result index 4f6702b85a7..daf4f5e30ba 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result @@ -59,4 +59,27 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345); # This must not fail an assert: select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; pk kp1 kp2 col1 -drop table t1,t2,t3,t4; +# +# Issue #881: Issue #809 still occurs for reverse scans on forward cfs +# +create table t5 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'bf5_1' +) engine=ROCKSDB; +insert into t5 select * from t1; +set global rocksdb_force_flush_memtable_now=1; +# An index scan starting from the end of the table: +explain +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t5 index NULL PRIMARY 122 NULL 1 +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +drop table t1,t2,t3,t4,t5; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result index 21417caf760..d37c8097622 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE INTO TABLE t1; +pk a b LOAD DATA INFILE INTO TABLE t2; +pk a b LOAD DATA INFILE INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result index 484c2a89c3a..270d372f343 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE INTO TABLE t1; +pk a b LOAD DATA INFILE INTO TABLE t2; +pk a b LOAD DATA INFILE INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result index 35a2845cb42..2fe9c551fb0 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE INTO TABLE t1; +pk a b LOAD DATA INFILE INTO TABLE t2; +pk a b LOAD DATA INFILE INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result index 12013539017..e534be48921 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result @@ -22,19 +22,20 @@ KEY(a) ) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 -connection default; +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= +'cf1={write_buffer_size=8m;target_file_size_base=1m};'; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; LOAD DATA INFILE INTO TABLE t1; +pk a b LOAD DATA INFILE INTO TABLE t2; +pk a b LOAD DATA INFILE INTO TABLE t3; +pk a b set rocksdb_bulk_load=0; SHOW TABLE STATUS WHERE name LIKE 't%'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment Max_index_length Temporary diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 444f997bf48..e58950072bc 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 connection default; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result index dea69b3b089..ca50d6f5a2b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result @@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 0 start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -STAT_TYPE VALUE -DB_NUM_SNAPSHOTS 1 +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +Has opened snapshots +1 connection default; set rocksdb_bulk_load=1; set rocksdb_bulk_load_size=100000; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result new file mode 100644 index 00000000000..1f687dfec53 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result @@ -0,0 +1,693 @@ +CREATE TABLE `link_table` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +CREATE TABLE `link_table2` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) +COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); +insert into link_table2 select * from link_table; +CREATE TABLE `id_table` ( +`id` bigint(20) NOT NULL DEFAULT '0', +`type` int(11) NOT NULL DEFAULT '0', +`row_created_time` int(11) NOT NULL DEFAULT '0', +`hash_key` varchar(255) NOT NULL DEFAULT '', +`is_deleted` tinyint(4) DEFAULT '0', +PRIMARY KEY (`id`), +KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); +CREATE TABLE `node_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +`update_time` int(10) unsigned NOT NULL DEFAULT '0', +`data` mediumtext COLLATE latin1_bin NOT NULL, +PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', +KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into node_table values (1, 1, 1, 10, 'data'); +insert into node_table values (2, 1, 1, 10, 'data'); +insert into node_table values (3, 1, 1, 10, 'data'); +insert into node_table values (4, 1, 1, 10, 'data'); +insert into node_table values (5, 1, 1, 10, 'data'); +insert into node_table values (6, 1, 1, 10, 'data'); +insert into node_table values (7, 1, 1, 10, 'data'); +insert into node_table values (8, 1, 1, 10, 'data'); +insert into node_table values (9, 1, 1, 10, 'data'); +insert into node_table values (10, 1, 1, 10, 'data'); +CREATE TABLE `count_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`count` int(10) unsigned NOT NULL DEFAULT '0', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into count_table values (2, 1, 1, 1, 10, 20); +insert into count_table values (3, 1, 1, 1, 10, 20); +insert into count_table values (4, 1, 1, 1, 10, 20); +insert into count_table values (5, 1, 1, 1, 10, 20); +insert into count_table values (6, 1, 1, 1, 10, 20); +insert into count_table values (7, 1, 1, 1, 10, 20); +insert into count_table values (8, 1, 1, 1, 10, 20); +insert into count_table values (9, 1, 1, 1, 10, 20); +insert into count_table values (10, 1, 1, 1, 10, 20); +CREATE TABLE `link_table5` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table3` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table6` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, +`data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table6 values (1, 1, 2, 2, 1, 1, +'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, +'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, +'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, +'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, +'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, +'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, +'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +CREATE TABLE `link_table4` ( +`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', +`raw_key` text COLLATE latin1_bin, +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version +from link_table WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +# Point query +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1) and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +2 2 1 2 a10 125 +2 3 1 2 a11 125 +2 4 1 2 a11 125 +# Prefix range query +# Prefix range query with SK +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME ASC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +# Prefix range query with SK with limits +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,10; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,5; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,1; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,10; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,5; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,1; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,10; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,5; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,1; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,0; +id1 id2 link_type visibility data time version +# Prefix range query with PK +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type +1 10 3 +1 9 3 +1 8 3 +1 7 3 +1 6 3 +1 5 3 +1 4 3 +1 3 3 +1 2 3 +1 1 3 +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type +1 1 3 +1 2 3 +1 3 3 +1 4 3 +1 5 3 +1 6 3 +1 7 3 +1 8 3 +1 9 3 +1 10 3 +# Prefix range query with PK + value +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 5 3 3 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 2 3 3 a10 10 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type visibility data time version +1 1 3 4 a10 10 125 +1 2 3 3 a10 10 125 +1 3 3 4 a11 11 125 +1 4 3 4 a11 11 125 +1 5 3 3 a12 12 125 +1 6 3 4 a12 12 125 +1 7 3 4 a12 12 125 +1 8 3 4 a13 13 125 +1 9 3 4 a14 14 125 +1 10 3 4 a15 15 125 +# Transaction +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +COMMIT; +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 cde 125 +ROLLBACK; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +# Data types +SELECT /*+ bypass */ id1 FROM link_table where link_type="3"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL; +id1 +DROP TABLE count_table; +DROP TABLE link_table; +DROP TABLE link_table3; +DROP TABLE link_table2; +DROP TABLE id_table; +DROP TABLE node_table; +DROP TABLE link_table5; +DROP TABLE link_table6; +DROP TABLE link_table4; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result new file mode 100644 index 00000000000..1f687dfec53 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result @@ -0,0 +1,693 @@ +CREATE TABLE `link_table` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +CREATE TABLE `link_table2` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id1_type` int(10) unsigned NOT NULL DEFAULT '0' , +`id2` bigint(20) unsigned NOT NULL DEFAULT '0' , +`id2_type` int(10) unsigned NOT NULL DEFAULT '0' , +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , +`visibility` tinyint(3) NOT NULL DEFAULT '0' , +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , +`time` int(10) unsigned NOT NULL DEFAULT '0' , +`version` bigint(20) unsigned NOT NULL DEFAULT '0' , +PRIMARY KEY (`link_type` , `id1` , `id2`) +COMMENT 'cf_link' , +KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , +`version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); +insert into link_table2 select * from link_table; +CREATE TABLE `id_table` ( +`id` bigint(20) NOT NULL DEFAULT '0', +`type` int(11) NOT NULL DEFAULT '0', +`row_created_time` int(11) NOT NULL DEFAULT '0', +`hash_key` varchar(255) NOT NULL DEFAULT '', +`is_deleted` tinyint(4) DEFAULT '0', +PRIMARY KEY (`id`), +KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); +CREATE TABLE `node_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +`update_time` int(10) unsigned NOT NULL DEFAULT '0', +`data` mediumtext COLLATE latin1_bin NOT NULL, +PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', +KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into node_table values (1, 1, 1, 10, 'data'); +insert into node_table values (2, 1, 1, 10, 'data'); +insert into node_table values (3, 1, 1, 10, 'data'); +insert into node_table values (4, 1, 1, 10, 'data'); +insert into node_table values (5, 1, 1, 10, 'data'); +insert into node_table values (6, 1, 1, 10, 'data'); +insert into node_table values (7, 1, 1, 10, 'data'); +insert into node_table values (8, 1, 1, 10, 'data'); +insert into node_table values (9, 1, 1, 10, 'data'); +insert into node_table values (10, 1, 1, 10, 'data'); +CREATE TABLE `count_table` ( +`id` bigint(20) unsigned NOT NULL DEFAULT '0', +`type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`count` int(10) unsigned NOT NULL DEFAULT '0', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into count_table values (2, 1, 1, 1, 10, 20); +insert into count_table values (3, 1, 1, 1, 10, 20); +insert into count_table values (4, 1, 1, 1, 10, 20); +insert into count_table values (5, 1, 1, 1, 10, 20); +insert into count_table values (6, 1, 1, 1, 10, 20); +insert into count_table values (7, 1, 1, 1, 10, 20); +insert into count_table values (8, 1, 1, 1, 10, 20); +insert into count_table values (9, 1, 1, 1, 10, 20); +insert into count_table values (10, 1, 1, 1, 10, 20); +CREATE TABLE `link_table5` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table3` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); +CREATE TABLE `link_table6` ( +`id1` bigint(20) unsigned NOT NULL DEFAULT '0', +`id1_type` int(10) unsigned NOT NULL DEFAULT '0', +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(4) NOT NULL DEFAULT '0', +`data` text COLLATE latin1_bin NOT NULL, +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, +`data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; +insert into link_table6 values (1, 1, 2, 2, 1, 1, +'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, +'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, +'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, +'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, +'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, +'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, +'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +CREATE TABLE `link_table4` ( +`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', +`raw_key` text COLLATE latin1_bin, +`id2` bigint(20) unsigned NOT NULL DEFAULT '0', +`id2_type` int(10) unsigned NOT NULL DEFAULT '0', +`link_type` bigint(20) unsigned NOT NULL DEFAULT '0', +`visibility` tinyint(3) NOT NULL DEFAULT '0', +`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', +`time` int(10) unsigned NOT NULL DEFAULT '0', +`version` bigint(20) unsigned NOT NULL DEFAULT '0', +PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', +KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) +COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version +from link_table WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +# Point query +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1) and id2 IN (2) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +1 3 1 2 a11 125 +1 4 1 2 a11 125 +2 2 1 2 a10 125 +2 3 1 2 a11 125 +2 4 1 2 a11 125 +# Prefix range query +# Prefix range query with SK +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10 +ORDER BY TIME ASC LIMIT 10; +id1 id2 link_type visibility data time version +1 2 3 3 a10 10 125 +# Prefix range query with SK with limits +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,10; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,5; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 0,1; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,10; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,5; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,1; +id1 id2 link_type visibility data time version +1 9 3 4 a14 14 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 1,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,10; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,5; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,1; +id1 id2 link_type visibility data time version +1 4 3 4 a11 11 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 5,0; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,10; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,5; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,1; +id1 id2 link_type visibility data time version +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (id1_type) +WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10 +ORDER BY TIME DESC LIMIT 10,0; +id1 id2 link_type visibility data time version +# Prefix range query with PK +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type +1 10 3 +1 9 3 +1 8 3 +1 7 3 +1 6 3 +1 5 3 +1 4 3 +1 3 3 +1 2 3 +1 1 3 +SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type +1 1 3 +1 2 3 +1 3 3 +1 4 3 +1 5 3 +1 6 3 +1 7 3 +1 8 3 +1 9 3 +1 10 3 +# Prefix range query with PK + value +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 DESC; +id1 id2 link_type visibility data time version +1 10 3 4 a15 15 125 +1 9 3 4 a14 14 125 +1 8 3 4 a13 13 125 +1 7 3 4 a12 12 125 +1 6 3 4 a12 12 125 +1 5 3 3 a12 12 125 +1 4 3 4 a11 11 125 +1 3 3 4 a11 11 125 +1 2 3 3 a10 10 125 +1 1 3 4 a10 10 125 +SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version +FROM link_table FORCE INDEX (PRIMARY) +WHERE link_type=3 and id1=1 ORDER BY id2 ASC; +id1 id2 link_type visibility data time version +1 1 3 4 a10 10 125 +1 2 3 3 a10 10 125 +1 3 3 4 a11 11 125 +1 4 3 4 a11 11 125 +1 5 3 3 a12 12 125 +1 6 3 4 a12 12 125 +1 7 3 4 a12 12 125 +1 8 3 4 a13 13 125 +1 9 3 4 a14 14 125 +1 10 3 4 a15 15 125 +# Transaction +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 a10 125 +UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +COMMIT; +BEGIN; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 cde 125 +ROLLBACK; +SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table +WHERE id1=1 and id2=2 and link_type=3; +id1 id2 id1_type id2_type data version +1 2 1 2 bcd 125 +# Data types +SELECT /*+ bypass */ id1 FROM link_table where link_type="3"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1"; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01'; +id1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL; +id1 +DROP TABLE count_table; +DROP TABLE link_table; +DROP TABLE link_table3; +DROP TABLE link_table2; +DROP TABLE id_table; +DROP TABLE node_table; +DROP TABLE link_table5; +DROP TABLE link_table6; +DROP TABLE link_table4; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result new file mode 100644 index 00000000000..12c5bc4f85c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result @@ -0,0 +1,66 @@ +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = -10; +connect conn1, localhost, root,,; +connection default; +CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB; +CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB; +CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1'; +INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +INSERT INTO t2 SELECT * FROM t1; +INSERT INTO t3 SELECT * FROM t1; +connection conn1; +set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go'; +SELECT value FROM t1 WHERE value = 3; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go'; +SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5); +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go'; +SELECT value FROM t1 WHERE value > 3; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go'; +SELECT id FROM t2; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +set debug_sync='RESET'; +connection conn1; +set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go'; +SELECT kp1 FROM t3 ORDER BY kp1; +connection default; +set debug_sync='now WAIT_FOR parked'; +KILL QUERY $conn1_id; +set debug_sync='now SIGNAL go'; +connection conn1; +ERROR 70100: Query execution was interrupted +connection default; +disconnect conn1; +set debug_sync='RESET'; +set global rocksdb_debug_ttl_read_filter_ts = DEFAULT; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result index e5aeb57ebdf..1c45cfd09fe 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result @@ -8,7 +8,7 @@ ERROR HY000: Table without primary key cannot be created outside mysql schema. CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB; ERROR HY000: Table without primary key cannot be created outside mysql schema. CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB; -ERROR HY000: Table without primary key cannot be created outside mysql schema. +ERROR 42000: A table must have at least 1 column CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB; CREATE TABLE mysql_primkey (a INT PRIMARY KEY, b INT, c INT, d INT, INDEX (c)) ENGINE=ROCKSDB; ALTER TABLE mysql_primkey DROP b, DROP a, ADD (f INT PRIMARY KEY); @@ -29,10 +29,24 @@ DROP INDEX `PRIMARY` ON mysql_primkey4; ERROR HY000: Table without primary key cannot be created outside mysql schema. ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a); ALTER TABLE mysql.mysql_table DROP PRIMARY KEY; +SET default_storage_engine=ROCKSDB; +CREATE TABLE mysql_noeng(a INT, b INT); +ERROR HY000: Table without primary key cannot be created outside mysql schema. +SET sql_mode=""; +CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE; +ERROR HY000: Table without primary key cannot be created outside mysql schema. +CREATE TABLE mysql_primkey5 LIKE mysql_primkey; +SET @@global.block_create_no_primary_key = false; +CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB; +SET @@global.block_create_no_primary_key = true; +CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey; +ERROR HY000: Table without primary key cannot be created outside mysql schema. DROP TABLE mysql_primkey; DROP TABLE mysql_primkey2; DROP TABLE mysql_primkey3; DROP TABLE mysql_primkey4; +DROP TABLE mysql_primkey5; +DROP TABLE mysql_no_primkey; USE mysql; DROP TABLE mysql_table; DROP TABLE mysql_table_2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result index 50733f81598..1e2636c873a 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result @@ -36,8 +36,8 @@ connection: default (for show processlist) # both con1 and default exist show processlist; Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id - root test