From 642195d255619854d13f20b9937b604dbbe2a5fb Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Thu, 12 Sep 2024 19:00:26 +0200 Subject: [PATCH] MDEV-34234: SST hangs when running on unprivileged containers on RHEL9 The lsof utility is prone to blocking on system calls that it uses to obtain information about sockets (or files, devices, etc.). This behavior is described in its own documentation. It has a '-b' option (in combination with warnings suppression via '-w') that reduces the probability of blocking, introducing new problems (luckily probably not relevant for our use case). However, there is no guarantee that it will not hang on some distributions, with some TCP/IP stack implementations, or with some filesystems, etc. Also, of the three utilities that are suitable for our purposes, lsof is the slowest. So if there are other utilities that we use during SST, such as 'ss' or 'sockstat', it is reasonable to use them instead of lsof. This commit changes the prioritization of utilities, it does not need additional tests (besides the numerous SST tests already available in the galera suites). If the system still need to use lsof, this commit adds the '-b' and '-w' options to it command line - to reduce the likelihood of blocking. --- scripts/wsrep_sst_common.sh | 40 +++++++++++++++++++++++-------------- scripts/wsrep_sst_rsync.sh | 32 +++++++++++++++++------------ 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh index 581f804d2d3..43732d7b49f 100644 --- a/scripts/wsrep_sst_common.sh +++ b/scripts/wsrep_sst_common.sh @@ -1224,14 +1224,18 @@ check_sockets_utils() sockstat_available=0 ss_available=0 - [ -n "$(commandex lsof)" ] && lsof_available=1 - [ -n "$(commandex sockstat)" ] && sockstat_available=1 - [ -n "$(commandex ss)" ] && ss_available=1 - - if [ $lsof_available -eq 0 -a \ - $sockstat_available -eq 0 -a \ - $ss_available -eq 0 ] - then + # The presence of any of these utilities is enough for us: + if [ -n "$(commandex ss)" ]; then + ss_available=1 + elif [ -n "$(commandex sockstat)" ]; then + sockstat_available=1 + elif [ -n "$(commandex lsof)" ]; then + lsof_available=1 + # Let's check that lsof has an option to bypass blocking: + if lsof -h 2>&1 | grep -qw -F -- '-b'; then + lsof_available=2 + fi + else wsrep_log_error "Neither lsof, nor sockstat or ss tool was found in" \ "the PATH. Make sure you have it installed." exit 2 # ENOENT @@ -1258,9 +1262,9 @@ check_port() local rc=1 - if [ $lsof_available -ne 0 ]; then - lsof -Pnl -i ":$port" 2>/dev/null | \ - grep -q -E "^($utils)[^[:space:]]*[[:space:]]+$pid[[:space:]].*\\(LISTEN\\)" && rc=0 + if [ $ss_available -ne 0 ]; then + ss -nlpH "( sport = :$port )" 2>/dev/null | \ + grep -q -E "users:\\(.*\\(\"($utils)[^[:space:]]*\"[^)]*,pid=$pid(,[^)]*)?\\)" && rc=0 elif [ $sockstat_available -ne 0 ]; then local opts='-p' if [ "$OS" = 'FreeBSD' ]; then @@ -1268,11 +1272,17 @@ check_port() # to display the connection state: opts='-sp' fi - sockstat "$opts" "$port" 2>/dev/null | \ + sockstat $opts "$port" 2>/dev/null | \ grep -q -E "[[:space:]]+($utils)[^[:space:]]*[[:space:]]+$pid[[:space:]].*[[:space:]]LISTEN" && rc=0 - elif [ $ss_available -ne 0 ]; then - ss -nlpH "( sport = :$port )" 2>/dev/null | \ - grep -q -E "users:\\(.*\\(\"($utils)[^[:space:]]*\"[^)]*,pid=$pid(,[^)]*)?\\)" && rc=0 + elif [ $lsof_available -ne 0 ]; then + local lsof_opts='-Pnl' + if [ $lsof_available -gt 1 ]; then + lsof_opts="$lsof_opts -b -w" + else + lsof_opts="$lsof_opts -S 15" + fi + lsof $lsof_opts -i ":$port" 2>/dev/null | \ + grep -q -E "^($utils)[^[:space:]]*[[:space:]]+$pid[[:space:]].*\\(LISTEN\\)" && rc=0 else wsrep_log_error "Unknown sockets utility" exit 2 # ENOENT diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index c22d0b3e494..c5640d2a22c 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -96,29 +96,35 @@ check_pid_and_port() local port_info local busy=0 - if [ $lsof_available -ne 0 ]; then - port_info=$(lsof -Pnl -i ":$port" 2>/dev/null | grep -F '(LISTEN)') - echo "$port_info" | \ - grep -q -E "[[:space:]]\\[?(\\*|[[:xdigit:]]*(:[[:xdigit:]]*)+)(\\](%[^:]+)?)?:$port[[:space:]]" && busy=1 - else - local filter='([^[:space:]]+[[:space:]]+){4}[^[:space:]]+' - if [ $sockstat_available -ne 0 ]; then + if [ $ss_available -ne 0 -o $sockstat_available -ne 0 ]; then + if [ $ss_available -ne 0 ]; then + port_info=$(ss -nlpH "( sport = :$port )" 2>/dev/null | \ + grep -F 'users:(' | grep -o -E "([^[:space:]]+[[:space:]]+){4}[^[:space:]]+") + else local opts='-p' + local terms=4 if [ "$OS" = 'FreeBSD' ]; then # sockstat on FreeBSD requires the "-s" option # to display the connection state: opts='-sp' # in addition, sockstat produces an additional column: - filter='([^[:space:]]+[[:space:]]+){5}[^[:space:]]+' + terms=5 fi - port_info=$(sockstat "$opts" "$port" 2>/dev/null | \ - grep -E '[[:space:]]LISTEN' | grep -o -E "$filter") - else - port_info=$(ss -nlpH "( sport = :$port )" 2>/dev/null | \ - grep -F 'users:(' | grep -o -E "$filter") + port_info=$(sockstat $opts "$port" 2>/dev/null | \ + grep -E '[[:space:]]LISTEN' | grep -o -E "([^[:space:]]+[[:space:]]+){$terms}[^[:space:]]+") fi echo "$port_info" | \ grep -q -E "[[:space:]]\\[?(\\*|[[:xdigit:]]*(:[[:xdigit:]]*)+)(\\](%[^:]+)?)?:$port\$" && busy=1 + else + local lsof_opts='-Pnl' + if [ $lsof_available -gt 1 ]; then + lsof_opts="$lsof_opts -b -w" + else + lsof_opts="$lsof_opts -S 15" + fi + port_info=$(lsof $lsof_opts -i ":$port" 2>/dev/null | grep -F '(LISTEN)' || :) + echo "$port_info" | \ + grep -q -E "[[:space:]]\\[?(\\*|[[:xdigit:]]*(:[[:xdigit:]]*)+)(\\](%[^:]+)?)?:$port[[:space:]]" && busy=1 fi if [ $busy -eq 0 ]; then