From 3b6424407025aad39b00e60bd0d711b67272513f Mon Sep 17 00:00:00 2001 From: Hugo Wen Date: Fri, 24 Mar 2023 16:06:11 +0000 Subject: [PATCH 1/4] Handle meaningless addr2line results and increase timeout MariaDB server prints the stack information if a crash happens. It traverses the stack frames in function `print_with_addr_resolve`. For *EACH* frame, it tries to parse the file name and line number of the frame using `addr2line`, or prints `backtrace_symbols_fd` if `addr2line` fails. 1. Logic in `addr_resolve` function uses addr2line to get the file name and line numbers. It has a timeout of 500ms to wait for the response from addr2line. However, that's not enough on small instances especially if the debug information is in a separate file or compressed. Increase the timeout to 5 seconds to support some edge cases, as experiments showed addr2line may take 2-3 seconds on some frames. 2. While parsing a frame inside of a shared library using `addr2line`, the file name and line numbers could be `??`, empty or `0` if the debug info is not loaded. It's easy to reproduce when glibc-debuginfo is not installed. Instead of printing a meaningless frame like: :0(__GI___poll)[0x1505e9197639] ... ??:0(__libc_start_main)[0x7ffff6c8913a] We want to print the frame information using `backtrace_symbols_fd`, with the shared library name and a hexadecimal offset. Stacktrace example on a real instance with this commit: /lib64/libc.so.6(__poll+0x49)[0x145cbf71a639] ... /lib64/libc.so.6(__libc_start_main+0xea)[0x7f4d0034d13a] `addr_resolve` has considered the case of meaningless combination of file name and line number returned by `addr2line`. e.g. `??:?` However, conditions like `:0` and `??:0` are not handled. So now the function will rollback to `backtrace_symbols_fd` in above cases. All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- mysys/my_addr_resolve.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mysys/my_addr_resolve.c b/mysys/my_addr_resolve.c index ac1bdae187c..52fcaaf67bb 100644 --- a/mysys/my_addr_resolve.c +++ b/mysys/my_addr_resolve.c @@ -252,10 +252,10 @@ int my_addr_resolve(void *ptr, my_addr_loc *loc) return 3; - /* 500 ms should be plenty of time for addr2line to issue a response. */ + /* 5000 ms should be plenty of time for addr2line to issue a response. */ /* Read in a loop till all the output from addr2line is complete. */ while (parsed == total_bytes_read && - (ret= poll(&poll_fds, 1, 500))) + (ret= poll(&poll_fds, 1, 5000))) { /* error during poll */ if (ret < 0) @@ -299,7 +299,8 @@ int my_addr_resolve(void *ptr, my_addr_loc *loc) loc->line= atoi(output + line_number_start); /* Addr2line was unable to extract any meaningful information. */ - if (strcmp(loc->file, "??") == 0) + if ((strcmp(loc->file, "??") == 0 || strcmp(loc->file, "") == 0) && + (loc->func[0] == '?' || loc->line == 0)) return 6; loc->file= strip_path(loc->file); From 1767390be4239c031ecbe999fa09bbc6c99c1c65 Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Sat, 1 Apr 2023 14:42:05 +0200 Subject: [PATCH 2/4] Fix passing correct length of string in command print. --- sql/sql_select.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index c4fc2d19156..0651c1d58bd 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -27990,19 +27990,19 @@ enum explainable_cmd_type }; static -const char * const explainable_cmd_name []= +const LEX_CSTRING explainable_cmd_name []= { - "select ", - "insert ", - "replace ", - "update ", - "delete ", + {STRING_WITH_LEN("select ")}, + {STRING_WITH_LEN("insert ")}, + {STRING_WITH_LEN("replace ")}, + {STRING_WITH_LEN("update ")}, + {STRING_WITH_LEN("delete ")}, }; static -char const *get_explainable_cmd_name(enum explainable_cmd_type cmd) +const LEX_CSTRING* get_explainable_cmd_name(enum explainable_cmd_type cmd) { - return explainable_cmd_name[cmd]; + return explainable_cmd_name + cmd; } static @@ -28304,7 +28304,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) query_type); } if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD) - str->append(STRING_WITH_LEN(get_explainable_cmd_name(sel_type))); + str->append(get_explainable_cmd_name(sel_type)); if (sel_type == DELETE_CMD) { str->append(STRING_WITH_LEN(" from ")); From 6a10468ed35167cbc5dc57c1091f8d8dcb572f0a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 15 Mar 2023 22:03:51 +0100 Subject: [PATCH 3/4] MDEV-13255 main.status failed in buildbot Test fails sporadically and very rarely on this: ``` let $org_queries= `SHOW STATUS LIKE 'Queries'`; SELECT f1(); CALL p1(); let $new_queries= `SHOW STATUS LIKE 'Queries'`; let $diff= `SELECT SUBSTRING('$new_queries',9)-SUBSTRING('$org_queries',9)`; ``` if COM_QUIT from one of the earlier (in the test) disconnect's happens between the two SHOW STATUS commands. Because COM_QUIT increments "Queries". The directly previous test uses wait_condition to wait for its disconnects to complete. But there are more disconnects earlier in the test file and nothing waits for them. Let's change wait_condition to wait for *all* disconnect to complete. --- mysql-test/main/status.test | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/mysql-test/main/status.test b/mysql-test/main/status.test index 221a24aedf4..ae1b90f9f7f 100644 --- a/mysql-test/main/status.test +++ b/mysql-test/main/status.test @@ -280,7 +280,6 @@ show status like 'Com%function'; # connect (root, localhost, root,,test); connection root; -let $root_connection_id= `select connection_id()`; --disable_warnings create database db37908; --enable_warnings @@ -296,7 +295,6 @@ delimiter ;| connect (user1,localhost,mysqltest_1,,test); connection user1; -let $user1_connection_id= `select connection_id()`; --error ER_TABLEACCESS_DENIED_ERROR select * from db37908.t1; @@ -315,11 +313,8 @@ drop procedure proc37908; drop function func37908; REVOKE ALL PRIVILEGES, GRANT OPTION FROM mysqltest_1@localhost; DROP USER mysqltest_1@localhost; -# Wait till the sessions user1 and root are disconnected -let $wait_condition = - SELECT COUNT(*) = 0 - FROM information_schema.processlist - WHERE id in ('$root_connection_id','$user1_connection_id'); +# Wait until all non-default sessions are disconnected +let $wait_condition = SELECT COUNT(*) = 1 FROM information_schema.processlist; --source include/wait_condition.inc # From 0a6343909fcf8b193a1b517b3a16eabd4ae89a83 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 1 Apr 2023 15:58:14 +0200 Subject: [PATCH 4/4] ensure that STRING_WITH_LEN is only used with string literals This is allowed: STRING_WITH_LEN("string literal") This is not: char *str = "pointer to string"; ... STRING_WITH_LEN(str) .. In C++ this is also allowed: const char str[] = "string literal"; ... STRING_WITH_LEN(str) ... --- include/m_string.h | 19 ++++++++++++++++--- sql/debug_sync.cc | 2 +- .../PerconaFT/portability/toku_debug_sync.h | 9 ++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/m_string.h b/include/m_string.h index 4a06b90c1aa..0133d81f6df 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -198,9 +198,22 @@ extern ulonglong strtoull(const char *str, char **ptr, int base); #include -#define STRING_WITH_LEN(X) (X), ((size_t) (sizeof(X) - 1)) -#define USTRING_WITH_LEN(X) ((uchar*) X), ((size_t) (sizeof(X) - 1)) -#define C_STRING_WITH_LEN(X) ((char *) (X)), ((size_t) (sizeof(X) - 1)) +#ifdef __cplusplus +#include +template inline const char *_swl_check(T s) +{ + static_assert(std::is_same::value + || std::is_same::value, + "Wrong argument for STRING_WITH_LEN()"); + return s; +} +#define STRING_WITH_LEN(X) _swl_check(X), ((size_t) (sizeof(X) - 1)) +#else +#define STRING_WITH_LEN(X) (X ""), ((size_t) (sizeof(X) - 1)) +#endif + +#define USTRING_WITH_LEN(X) (uchar*) STRING_WITH_LEN(X) +#define C_STRING_WITH_LEN(X) (char *) STRING_WITH_LEN(X) #define LEX_STRING_WITH_LEN(X) (X).str, (X).length typedef struct st_mysql_const_lex_string LEX_CSTRING; diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc index 16ff4abafe1..5cb8fb715bf 100644 --- a/sql/debug_sync.cc +++ b/sql/debug_sync.cc @@ -1292,7 +1292,7 @@ uchar *debug_sync_value_ptr(THD *thd) if (opt_debug_sync_timeout) { - static char on[]= "ON - current signal: '"; + static const char on[]= "ON - current signal: '"; // Ensure exclusive access to debug_sync_global.ds_signal mysql_mutex_lock(&debug_sync_global.ds_mutex); diff --git a/storage/tokudb/PerconaFT/portability/toku_debug_sync.h b/storage/tokudb/PerconaFT/portability/toku_debug_sync.h index affe3054214..ff99c99d81e 100644 --- a/storage/tokudb/PerconaFT/portability/toku_debug_sync.h +++ b/storage/tokudb/PerconaFT/portability/toku_debug_sync.h @@ -64,9 +64,12 @@ inline void toku_debug_sync(struct tokutxn *txn, const char *sync_point_name) { void *client_extra; THD *thd; - toku_txn_get_client_id(txn, &client_id, &client_extra); - thd = reinterpret_cast(client_extra); - DEBUG_SYNC(thd, sync_point_name); + if (debug_sync_service) + { + toku_txn_get_client_id(txn, &client_id, &client_extra); + thd = reinterpret_cast(client_extra); + debug_sync_service(thd, sync_point_name, strlen(sync_point_name)); + } } #else // defined(ENABLED_DEBUG_SYNC)