mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Tighten TAP tests' tracking of postmaster state some more.
Commits 6c4a8903b et al. had a couple of deficiencies:
* The logic I added to Cluster::start to see if a PID file is present
could be fooled by a stale PID file left over from a previous
postmaster.  To fix, if we're not sure whether we expect to find a
running postmaster or not, validate the PID using "kill 0".
* 017_shm.pl has a loop in which it just issues repeated Cluster::start
calls; this will fail if some invocation fails but leaves self->_pid
set.  Per buildfarm results, the above fix is not enough to make this
safe: we might have "validated" a PID for a postmaster that exits
immediately after we look.  Hence, match each failed start call with
a stop call that will get us back to the self->_pid == undef state.
Add a fail_ok option to Cluster::stop to make this work.
Discussion: https://postgr.es/m/CA+hUKGKV6fOHvfiPt8=dOKzvswjAyLoFoJF1iQXMNpi7+hD1JQ@mail.gmail.com
			
			
This commit is contained in:
		@@ -831,20 +831,37 @@ Note: if the node is already known stopped, this does nothing.
 | 
				
			|||||||
However, if we think it's running and it's not, it's important for
 | 
					However, if we think it's running and it's not, it's important for
 | 
				
			||||||
this to fail.  Otherwise, tests might fail to detect server crashes.
 | 
					this to fail.  Otherwise, tests might fail to detect server crashes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With optional extra param fail_ok => 1, returns 0 for failure
 | 
				
			||||||
 | 
					instead of bailing out.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
=cut
 | 
					=cut
 | 
				
			||||||
 | 
					
 | 
				
			||||||
sub stop
 | 
					sub stop
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	my ($self, $mode) = @_;
 | 
						my ($self, $mode, %params) = @_;
 | 
				
			||||||
	my $port   = $self->port;
 | 
					 | 
				
			||||||
	my $pgdata = $self->data_dir;
 | 
						my $pgdata = $self->data_dir;
 | 
				
			||||||
	my $name   = $self->name;
 | 
						my $name   = $self->name;
 | 
				
			||||||
 | 
						my $ret;
 | 
				
			||||||
	$mode = 'fast' unless defined $mode;
 | 
						$mode = 'fast' unless defined $mode;
 | 
				
			||||||
	return unless defined $self->{_pid};
 | 
						return 1 unless defined $self->{_pid};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	print "### Stopping node \"$name\" using mode $mode\n";
 | 
						print "### Stopping node \"$name\" using mode $mode\n";
 | 
				
			||||||
	TestLib::system_or_bail('pg_ctl', '-D', $pgdata, '-m', $mode, 'stop');
 | 
						$ret = TestLib::system_log('pg_ctl', '-D', $pgdata,
 | 
				
			||||||
 | 
							'-m', $mode, 'stop');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ($ret != 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							print "# pg_ctl stop failed: $ret\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Check to see if we still have a postmaster or not.
 | 
				
			||||||
 | 
							$self->_update_pid(-1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							BAIL_OUT("pg_ctl stop failed") unless $params{fail_ok};
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	$self->_update_pid(0);
 | 
						$self->_update_pid(0);
 | 
				
			||||||
	return;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
=pod
 | 
					=pod
 | 
				
			||||||
@@ -1066,9 +1083,20 @@ sub _update_pid
 | 
				
			|||||||
	if (open my $pidfile, '<', $self->data_dir . "/postmaster.pid")
 | 
						if (open my $pidfile, '<', $self->data_dir . "/postmaster.pid")
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		chomp($self->{_pid} = <$pidfile>);
 | 
							chomp($self->{_pid} = <$pidfile>);
 | 
				
			||||||
		print "# Postmaster PID for node \"$name\" is $self->{_pid}\n";
 | 
					 | 
				
			||||||
		close $pidfile;
 | 
							close $pidfile;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# If we aren't sure what to expect, validate the PID using kill().
 | 
				
			||||||
 | 
							# This protects against stale PID files left by crashed postmasters.
 | 
				
			||||||
 | 
							if ($is_running == -1 && kill(0, $self->{_pid}) == 0)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								print
 | 
				
			||||||
 | 
								  "# Stale postmaster.pid file for node \"$name\": PID $self->{_pid} no longer exists\n";
 | 
				
			||||||
 | 
								$self->{_pid} = undef;
 | 
				
			||||||
 | 
								return;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							print "# Postmaster PID for node \"$name\" is $self->{_pid}\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		# If we found a pidfile when there shouldn't be one, complain.
 | 
							# If we found a pidfile when there shouldn't be one, complain.
 | 
				
			||||||
		BAIL_OUT("postmaster.pid unexpectedly present") if $is_running == 0;
 | 
							BAIL_OUT("postmaster.pid unexpectedly present") if $is_running == 0;
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -204,6 +204,9 @@ sub poll_start
 | 
				
			|||||||
		# Wait 0.1 second before retrying.
 | 
							# Wait 0.1 second before retrying.
 | 
				
			||||||
		usleep(100_000);
 | 
							usleep(100_000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Clean up in case the start attempt just timed out or some such.
 | 
				
			||||||
 | 
							$node->stop('fast', fail_ok => 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		$attempts++;
 | 
							$attempts++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user