From a49b9314c16fe60b6056b50405d28eff69eed868 Mon Sep 17 00:00:00 2001 From: Aleksey Midenkov Date: Mon, 7 Aug 2023 18:49:47 +0300 Subject: [PATCH] MDEV-30836 MTR hangs after tests have completed The problem is in manager/worker communication when worker sends WARNINGS and then TESTRESULT. If manager yet didn't read WARNINGS response both responses get into the same buffer, can_read() will indicate we have data only once and we must read all the data from the socket at once. Otherwise TESTRESULT response is lost and manager waits it forever. The fix now instead of single line reads the socket in a loop. But if there is only one response in the buffer the second read will be blocked waiting until new data arrives. That can be overcame by blocking(0) which sets the handle into non-blocking mode. If there is no data second read just returns undef. The problem is non-blocking mode is not supported by all perl flavors on Windows. Strawberry and ActiveState do not support it. Cygwin and MSYS2 do support. There is some ioctl() hack that was known to "work" but it doesn't do what is expected (it does not return data when there is data). So for Windows if it is not Cygwin we disable the fix. --- mysql-test/mysql-test-run.pl | 54 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 3c11c0ee4cf..40b7a051735 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -643,7 +643,7 @@ sub parse_protocol($$) { { mtr_verbose2("${line}: saying BYE to ". $names{$sock}); print $sock "BYE\n"; - return 1; + return 2; } return ["Failure", 1, $completed, $extra_warnings]; } @@ -692,7 +692,7 @@ sub parse_protocol($$) { $result->write_test($sock, 'TESTCASE'); } push(@$completed, $result); - return 1; + return 2; } } @@ -706,7 +706,7 @@ sub parse_protocol($$) { delete($result->{result}); $result->{repeat}= $repeat+1; $result->write_test($sock, 'TESTCASE'); - return 1; + return 2; } # Remove from list of running @@ -779,7 +779,7 @@ sub run ($$$) { } main::mark_time_idle(); my $i= 0; - foreach my $sock (@ready) { + sock_loop: foreach my $sock (@ready) { ++$i; if ($sock == $server) { # New client connected @@ -790,23 +790,37 @@ sub run ($$$) { print $child "HELLO\n"; } else { - my $line= <$sock>; - if (!defined $line) { - # Client disconnected - --$childs; - mtr_verbose2((exists $names{$sock} ? $names{$sock} : "Worker"). " closed socket (left ${childs} childs)"); - $s->remove($sock); - $sock->close; - next; - } - chomp($line); - mtr_verbose2("Connection ${i}". (exists $names{$sock} ? " from $names{$sock}" : "") .": $line"); + my $j= 0; + $sock->blocking(0); + while (my $line= <$sock>) { + ++$j; + chomp($line); + mtr_verbose2("Connection ${i}.${j}". (exists $names{$sock} ? " from $names{$sock}" : "") .": $line"); - my $res= parse_protocol($sock, $line); - if (ref $res eq 'ARRAY') { - return @$res; - } elsif ($res == 1) { - next; + $sock->blocking(1); + my $res= parse_protocol($sock, $line); + $sock->blocking(0); + if (ref $res eq 'ARRAY') { + return @$res; + } elsif ($res == 1) { + next; + } elsif ($res == 2) { + next sock_loop; + } + if (IS_WINDOWS and !IS_CYGWIN) { + # Strawberry and ActiveState don't support blocking(0), the next iteration will be blocked! + # If there is next response now in the buffer and it is TESTRESULT we are affected by MDEV-30836 and the manager will hang. + last; + } + } + $sock->blocking(1); + if ($j == 0) { + # Client disconnected + --$childs; + mtr_verbose2((exists $names{$sock} ? $names{$sock} : "Worker"). " closed socket (left ${childs} childs)"); + $s->remove($sock); + $sock->close; + next; } # Find next test to schedule