diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 9d0f83cde31..6da58437c58 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1687,10 +1687,8 @@ dropdb(const char *dbname, bool missing_ok, bool force) */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); -#if defined(USE_BARRIER_SMGRRELEASE) /* Close all smgr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); -#endif /* * Remove all tablespace subdirs belonging to the database. @@ -1940,10 +1938,8 @@ movedb(const char *dbname, const char *tblspcname) RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); -#if defined(USE_BARRIER_SMGRRELEASE) /* Close all smgr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); -#endif /* * Now drop all buffers holding data of the target database; they should @@ -3054,6 +3050,9 @@ dbase_redo(XLogReaderState *record) */ FlushDatabaseBuffers(xlrec->src_db_id); + /* Close all sgmr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + /* * Copy this subdirectory to the new location * @@ -3111,10 +3110,8 @@ dbase_redo(XLogReaderState *record) /* Clean out the xlog relcache too */ XLogDropDatabase(xlrec->db_id); -#if defined(USE_BARRIER_SMGRRELEASE) /* Close all sgmr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); -#endif for (i = 0; i < xlrec->ntablespaces; i++) { diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 40514ab550f..822d65287ef 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -548,11 +548,10 @@ DropTableSpace(DropTableSpaceStmt *stmt) * use a global barrier to ask all backends to close all files, and * wait until they're finished. */ -#if defined(USE_BARRIER_SMGRRELEASE) LWLockRelease(TablespaceCreateLock); WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); -#endif + /* And now try again. */ if (!destroy_tablespace_directories(tablespaceoid, false)) { @@ -1574,6 +1573,9 @@ tblspc_redo(XLogReaderState *record) { xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); + /* Close all smgr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + /* * If we issued a WAL record for a drop tablespace it implies that * there were no files in it at all when the DROP was done. That means @@ -1591,11 +1593,6 @@ tblspc_redo(XLogReaderState *record) */ if (!destroy_tablespace_directories(xlrec->ts_id, true)) { -#if defined(USE_BARRIER_SMGRRELEASE) - /* Close all smgr fds in all backends. */ - WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); -#endif - ResolveRecoveryConflictWithTablespace(xlrec->ts_id); /* diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 84ce5a4a5d7..8d2e3e3a57d 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -152,17 +152,6 @@ #define EXEC_BACKEND #endif -/* - * If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink - * directories will ask other backends to close all smgr file descriptors. - * This is enabled on Windows, because otherwise unlinked but still open files - * can prevent rmdir(containing_directory) from succeeding. On other - * platforms, it can be defined to exercise those code paths. - */ -#if defined(WIN32) -#define USE_BARRIER_SMGRRELEASE -#endif - /* * Define this if your operating system supports link() */ diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile index da5b9ff397c..c47eee273b7 100644 --- a/src/test/recovery/Makefile +++ b/src/test/recovery/Makefile @@ -9,7 +9,7 @@ # #------------------------------------------------------------------------- -EXTRA_INSTALL=contrib/test_decoding +EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm subdir = src/test/recovery top_builddir = ../../.. diff --git a/src/test/recovery/t/032_relfilenode_reuse.pl b/src/test/recovery/t/032_relfilenode_reuse.pl new file mode 100644 index 00000000000..22d8e85614c --- /dev/null +++ b/src/test/recovery/t/032_relfilenode_reuse.pl @@ -0,0 +1,233 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Basename; + + +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf('postgresql.conf', q[ +allow_in_place_tablespaces = true +log_connections=on +# to avoid "repairing" corruption +full_page_writes=off +log_min_messages=debug2 +autovacuum_naptime=1s +shared_buffers=1MB +]); +$node_primary->start; + + +# Create streaming standby linking to primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->start; + +# To avoid hanging while expecting some specific input from a psql +# instance being driven by us, add a timeout high enough that it +# should never trigger even on very slow machines, unless something +# is really wrong. +my $psql_timeout = IPC::Run::timer(300); + +my %psql_primary = (stdin => '', stdout => '', stderr => ''); +$psql_primary{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ], + '<', + \$psql_primary{stdin}, + '>', + \$psql_primary{stdout}, + '2>', + \$psql_primary{stderr}, + $psql_timeout); + +my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => ''); +$psql_standby{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ], + '<', + \$psql_standby{stdin}, + '>', + \$psql_standby{stdout}, + '2>', + \$psql_standby{stderr}, + $psql_timeout); + + +# Create template database with a table that we'll update, to trigger dirty +# rows. Using a template database + preexisting rows makes it a bit easier to +# reproduce, because there's no cache invalidations generated. + +$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db_template OID = 50000;"); +$node_primary->safe_psql('conflict_db_template', q[ + CREATE TABLE large(id serial primary key, dataa text, datab text); + INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]); +$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;"); + +$node_primary->safe_psql('postgres', q[ + CREATE EXTENSION pg_prewarm; + CREATE TABLE replace_sb(data text); + INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]); + +# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files +send_query_and_wait( + \%psql_primary, + q[BEGIN;], + qr/BEGIN/m); +send_query_and_wait( + \%psql_standby, + q[BEGIN;], + qr/BEGIN/m); + +# Cause lots of dirty rows in shared_buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 1;"); + +# Now do a bunch of work in another database. That will end up needing to +# write back dirty data from the previous step, opening the relevant file +# descriptors +cause_eviction(\%psql_primary, \%psql_standby); + +# drop and recreate database +$node_primary->safe_psql('postgres', "DROP DATABASE conflict_db;"); +$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;"); + +verify($node_primary, $node_standby, 1, + "initial contents as expected"); + +# Again cause lots of dirty rows in shared_buffers, but use a different update +# value so we can check everything is OK +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 2;"); + +# Again cause a lot of IO. That'll again write back dirty data, but uses (XXX +# adjust after bugfix) the already opened file descriptor. +# FIXME +cause_eviction(\%psql_primary, \%psql_standby); + +verify($node_primary, $node_standby, 2, + "update to reused relfilenode (due to DB oid conflict) is not lost"); + + +$node_primary->safe_psql('conflict_db', "VACUUM FULL large;"); +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 3;"); + +verify($node_primary, $node_standby, 3, + "restored contents as expected"); + +# Test for old filehandles after moving a database in / out of tablespace +$node_primary->safe_psql('postgres', q[CREATE TABLESPACE test_tablespace LOCATION '']); + +# cause dirty buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 4;"); +# cause files to be opened in backend in other database +cause_eviction(\%psql_primary, \%psql_standby); + +# move database back / forth +$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace'); +$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE pg_default'); + +# cause dirty buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 5;"); +cause_eviction(\%psql_primary, \%psql_standby); + +verify($node_primary, $node_standby, 5, + "post move contents as expected"); + +$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace'); + +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 7;"); +cause_eviction(\%psql_primary, \%psql_standby); +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 8;"); +$node_primary->safe_psql('postgres', 'DROP DATABASE conflict_db'); +$node_primary->safe_psql('postgres', 'DROP TABLESPACE test_tablespace'); + +$node_primary->safe_psql('postgres', 'REINDEX TABLE pg_database'); + + +# explicitly shut down psql instances gracefully - to avoid hangs +# or worse on windows +$psql_primary{stdin} .= "\\q\n"; +$psql_primary{run}->finish; +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_primary->stop(); +$node_standby->stop(); + +# Make sure that there weren't crashes during shutdown + +command_like([ 'pg_controldata', $node_primary->data_dir ], + qr/Database cluster state:\s+shut down\n/, 'primary shut down ok'); +command_like([ 'pg_controldata', $node_standby->data_dir ], + qr/Database cluster state:\s+shut down in recovery\n/, 'standby shut down ok'); +done_testing(); + +sub verify +{ + my ($primary, $standby, $counter, $message) = @_; + + my $query = "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10"; + is($primary->safe_psql('conflict_db', $query), + "$counter|4000", + "primary: $message"); + + $primary->wait_for_catchup($standby); + is($standby->safe_psql('conflict_db', $query), + "$counter|4000", + "standby: $message"); +} + +sub cause_eviction +{ + my ($psql_primary, $psql_standby) = @_; + + send_query_and_wait( + $psql_primary, + q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;], + qr/warmed_buffers/m); + + send_query_and_wait( + $psql_standby, + q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;], + qr/warmed_buffers/m); +} + +# Send query, wait until string matches +sub send_query_and_wait +{ + my ($psql, $query, $untl) = @_; + my $ret; + + # send query + $$psql{stdin} .= $query; + $$psql{stdin} .= "\n"; + + # wait for query results + $$psql{run}->pump_nb(); + while (1) + { + last if $$psql{stdout} =~ /$untl/; + + if ($psql_timeout->is_expired) + { + BAIL_OUT("aborting wait: program timed out\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + if (not $$psql{run}->pumpable()) + { + BAIL_OUT("aborting wait: program died\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + $$psql{run}->pump(); + } + + $$psql{stdout} = ''; + + return 1; +}