diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index c94cf34e698..82378db441a 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -173,7 +173,7 @@ blbuildempty(Relation index)
* Write the page and log it. It might seem that an immediate sync would
* be sufficient to guarantee that the file exists on disk, but recovery
* itself might remove it while replaying, for example, an
- * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
+ * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
* this even when wal_level=minimal.
*/
PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 6a6b09dc456..3b9172f65bd 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1502,6 +1502,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
TwophaseFileWriteWaiting for a write of a two phase state file.
+
+ VersionFileWrite
+ Waiting for the version file to be written while creating a database.
+ WALBootstrapSyncWaiting for WAL to reach durable storage during
diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index 5ae785ab95a..255ad3a1ce0 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -25,6 +25,7 @@ CREATE DATABASE name
[ [ WITH ] [ OWNER [=] user_name ]
[ TEMPLATE [=] template ]
[ ENCODING [=] encoding ]
+ [ STRATEGY [=] strategy ] ]
[ LOCALE [=] locale ]
[ LC_COLLATE [=] lc_collate ]
[ LC_CTYPE [=] lc_ctype ]
@@ -118,6 +119,27 @@ CREATE DATABASE name
+
+ strategy
+
+
+ Strategy to be used in creating the new database. If
+ the WAL_LOG strategy is used, the database will be
+ copied block by block and each block will be separately written
+ to the write-ahead log. This is the most efficient strategy in
+ cases where the template database is small, and therefore it is the
+ default. The older FILE_COPY strategy is also
+ available. This strategy writes a small record to the write-ahead log
+ for each tablespace used by the target database. Each such record
+ represents copying an entire directory to a new location at the
+ filesystem level. While this does reduce the write-ahed
+ log volume substantially, especially if the template database is large,
+ it also forces the system to perform a checkpoint both before and
+ after the creation of the new database. In some situations, this may
+ have a noticeable negative impact on overall system performance.
+
+
+ locale
diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml
index be42e502d69..671cd362d94 100644
--- a/doc/src/sgml/ref/createdb.sgml
+++ b/doc/src/sgml/ref/createdb.sgml
@@ -177,6 +177,17 @@ PostgreSQL documentation
+
+
+
+
+
+ Specifies the database creation strategy. See
+ for more details.
+
+
+
+
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 39ef8a0b77d..dee264e8596 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -593,7 +593,7 @@ heapam_relation_set_new_filenode(Relation rel,
*/
*minmulti = GetOldestMultiXactId();
- srel = RelationCreateStorage(*newrnode, persistence);
+ srel = RelationCreateStorage(*newrnode, persistence, true);
/*
* If required, set up an init fork for an unlogged table so that it can
@@ -601,7 +601,7 @@ heapam_relation_set_new_filenode(Relation rel,
* even if the page has been logged, because the write did not go through
* shared_buffers and therefore a concurrent checkpoint may have moved the
* redo pointer past our xlog record. Recovery may as well remove it
- * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
+ * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
* record. Therefore, logging is necessary even if wal_level=minimal.
*/
if (persistence == RELPERSISTENCE_UNLOGGED)
@@ -645,7 +645,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
- RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
+ RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence, true);
/* copy main fork */
RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c9b4964c1e8..dacf3f7a587 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -161,7 +161,7 @@ btbuildempty(Relation index)
* Write the page and log it. It might seem that an immediate sync would
* be sufficient to guarantee that the file exists on disk, but recovery
* itself might remove it while replaying, for example, an
- * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
+ * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
* this even when wal_level=minimal.
*/
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c
index 03af3fdbcfd..523d0b3c1da 100644
--- a/src/backend/access/rmgrdesc/dbasedesc.c
+++ b/src/backend/access/rmgrdesc/dbasedesc.c
@@ -24,14 +24,23 @@ dbase_desc(StringInfo buf, XLogReaderState *record)
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
- if (info == XLOG_DBASE_CREATE)
+ if (info == XLOG_DBASE_CREATE_FILE_COPY)
{
- xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) rec;
+ xl_dbase_create_file_copy_rec *xlrec =
+ (xl_dbase_create_file_copy_rec *) rec;
appendStringInfo(buf, "copy dir %u/%u to %u/%u",
xlrec->src_tablespace_id, xlrec->src_db_id,
xlrec->tablespace_id, xlrec->db_id);
}
+ else if (info == XLOG_DBASE_CREATE_WAL_LOG)
+ {
+ xl_dbase_create_wal_log_rec *xlrec =
+ (xl_dbase_create_wal_log_rec *) rec;
+
+ appendStringInfo(buf, "create dir %u/%u",
+ xlrec->tablespace_id, xlrec->db_id);
+ }
else if (info == XLOG_DBASE_DROP)
{
xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) rec;
@@ -51,8 +60,11 @@ dbase_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_DBASE_CREATE:
- id = "CREATE";
+ case XLOG_DBASE_CREATE_FILE_COPY:
+ id = "CREATE_FILE_COPY";
+ break;
+ case XLOG_DBASE_CREATE_WAL_LOG:
+ id = "CREATE_WAL_LOG";
break;
case XLOG_DBASE_DROP:
id = "DROP";
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 511f2f186f5..a4dedc58b71 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -484,7 +484,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
{
/* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
- mode, NULL);
+ mode, NULL, true);
}
else
{
@@ -509,7 +509,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
ReleaseBuffer(buffer);
}
buffer = ReadBufferWithoutRelcache(rnode, forknum,
- P_NEW, mode, NULL);
+ P_NEW, mode, NULL, true);
}
while (BufferGetBlockNumber(buffer) < blkno);
/* Handle the corner case that P_NEW returns non-consecutive pages */
@@ -519,7 +519,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
- mode, NULL);
+ mode, NULL, true);
}
}
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 696fd5977e0..6eb78a9c0ff 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -387,7 +387,7 @@ heap_create(const char *relname,
relpersistence,
relfrozenxid, relminmxid);
else if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
- RelationCreateStorage(rel->rd_node, relpersistence);
+ RelationCreateStorage(rel->rd_node, relpersistence, true);
else
Assert(false);
}
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index ce5568ff084..9898701a438 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -112,12 +112,14 @@ AddPendingSync(const RelFileNode *rnode)
* modules that need them.
*
* This function is transactional. The creation is WAL-logged, and if the
- * transaction aborts later on, the storage will be destroyed.
+ * transaction aborts later on, the storage will be destroyed. A caller
+ * that does not want the storage to be destroyed in case of an abort may
+ * pass register_delete = false.
*/
SMgrRelation
-RelationCreateStorage(RelFileNode rnode, char relpersistence)
+RelationCreateStorage(RelFileNode rnode, char relpersistence,
+ bool register_delete)
{
- PendingRelDelete *pending;
SMgrRelation srel;
BackendId backend;
bool needs_wal;
@@ -149,15 +151,23 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
if (needs_wal)
log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);
- /* Add the relation to the list of stuff to delete at abort */
- pending = (PendingRelDelete *)
- MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
- pending->relnode = rnode;
- pending->backend = backend;
- pending->atCommit = false; /* delete if abort */
- pending->nestLevel = GetCurrentTransactionNestLevel();
- pending->next = pendingDeletes;
- pendingDeletes = pending;
+ /*
+ * Add the relation to the list of stuff to delete at abort, if we are
+ * asked to do so.
+ */
+ if (register_delete)
+ {
+ PendingRelDelete *pending;
+
+ pending = (PendingRelDelete *)
+ MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
+ pending->relnode = rnode;
+ pending->backend = backend;
+ pending->atCommit = false; /* delete if abort */
+ pending->nestLevel = GetCurrentTransactionNestLevel();
+ pending->next = pendingDeletes;
+ pendingDeletes = pending;
+ }
if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
{
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 623e5ec7789..df16533901e 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -63,13 +63,31 @@
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/pg_locale.h"
+#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
+/*
+ * Create database strategy.
+ *
+ * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each
+ * copied block.
+ *
+ * CREATEDB_FILE_COPY will simply perform a file system level copy of the
+ * database and log a single record for each tablespace copied. To make this
+ * safe, it also triggers checkpoints before and after the operation.
+ */
+typedef enum CreateDBStrategy
+{
+ CREATEDB_WAL_LOG,
+ CREATEDB_FILE_COPY
+} CreateDBStrategy;
+
typedef struct
{
Oid src_dboid; /* source (template) DB */
Oid dest_dboid; /* DB we are trying to create */
+ CreateDBStrategy strategy; /* create db strategy */
} createdb_failure_params;
typedef struct
@@ -78,6 +96,17 @@ typedef struct
Oid dest_tsoid; /* tablespace we are trying to move to */
} movedb_failure_params;
+/*
+ * Information about a relation to be copied when creating a database.
+ */
+typedef struct CreateDBRelInfo
+{
+ RelFileNode rnode; /* physical relation identifier */
+ Oid reloid; /* relation oid */
+ bool permanent; /* relation is permanent or unlogged */
+} CreateDBRelInfo;
+
+
/* non-export function prototypes */
static void createdb_failure_callback(int code, Datum arg);
static void movedb(const char *dbname, const char *tblspcname);
@@ -93,7 +122,546 @@ static bool have_createdb_privilege(void);
static void remove_dbtablespaces(Oid db_id);
static bool check_db_file_conflict(Oid db_id);
static int errdetail_busy_db(int notherbackends, int npreparedxacts);
+static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dboid, Oid src_tsid,
+ Oid dst_tsid);
+static List *ScanSourceDatabasePgClass(Oid srctbid, Oid srcdbid, char *srcpath);
+static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
+ Oid dbid, char *srcpath,
+ List *rnodelist, Snapshot snapshot);
+static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
+ Oid tbid, Oid dbid,
+ char *srcpath);
+static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
+ bool isRedo);
+static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
+ Oid dst_tsid);
+/*
+ * Create a new database using the WAL_LOG strategy.
+ *
+ * Each copied block is separately written to the write-ahead log.
+ */
+static void
+CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
+ Oid src_tsid, Oid dst_tsid)
+{
+ char *srcpath;
+ char *dstpath;
+ List *rnodelist = NULL;
+ ListCell *cell;
+ LockRelId srcrelid;
+ LockRelId dstrelid;
+ RelFileNode srcrnode;
+ RelFileNode dstrnode;
+ CreateDBRelInfo *relinfo;
+
+ /* Get source and destination database paths. */
+ srcpath = GetDatabasePath(src_dboid, src_tsid);
+ dstpath = GetDatabasePath(dst_dboid, dst_tsid);
+
+ /* Create database directory and write PG_VERSION file. */
+ CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false);
+
+ /* Copy relmap file from source database to the destination database. */
+ RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath);
+
+ /* Get list of relfilenodes to copy from the source database. */
+ rnodelist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath);
+ Assert(rnodelist != NIL);
+
+ /*
+ * Database IDs will be the same for all relations so set them before
+ * entering the loop.
+ */
+ srcrelid.dbId = src_dboid;
+ dstrelid.dbId = dst_dboid;
+
+ /* Loop over our list of relfilenodes and copy each one. */
+ foreach(cell, rnodelist)
+ {
+ relinfo = lfirst(cell);
+ srcrnode = relinfo->rnode;
+
+ /*
+ * If the relation is from the source db's default tablespace then we
+ * need to create it in the destinations db's default tablespace.
+ * Otherwise, we need to create in the same tablespace as it is in the
+ * source database.
+ */
+ if (srcrnode.spcNode == src_tsid)
+ dstrnode.spcNode = dst_tsid;
+ else
+ dstrnode.spcNode = srcrnode.spcNode;
+
+ dstrnode.dbNode = dst_dboid;
+ dstrnode.relNode = srcrnode.relNode;
+
+ /*
+ * Acquire locks on source and target relations before copying.
+ *
+ * We typically do not read relation data into shared_buffers without
+ * holding a relation lock. It's unclear what could go wrong if we
+ * skipped it in this case, because nobody can be modifying either
+ * the source or destination database at this point, and we have locks
+ * on both databases, too, but let's take the conservative route.
+ */
+ dstrelid.relId = srcrelid.relId = relinfo->reloid;
+ LockRelationId(&srcrelid, AccessShareLock);
+ LockRelationId(&dstrelid, AccessShareLock);
+
+ /* Copy relation storage from source to the destination. */
+ CreateAndCopyRelationData(srcrnode, dstrnode, relinfo->permanent);
+
+ /* Release the relation locks. */
+ UnlockRelationId(&srcrelid, AccessShareLock);
+ UnlockRelationId(&dstrelid, AccessShareLock);
+ }
+
+ list_free_deep(rnodelist);
+}
+
+/*
+ * Scan the pg_class table in the source database to identify the relations
+ * that need to be copied to the destination database.
+ *
+ * This is an exception to the usual rule that cross-database access is
+ * not possible. We can make it work here because we know that there are no
+ * connections to the source database and (since there can't be prepared
+ * transactions touching that database) no in-doubt tuples either. This
+ * means that we don't need to worry about pruning removing anything from
+ * under us, and we don't need to be too picky about our snapshot either.
+ * As long as it sees all previously-committed XIDs as committed and all
+ * aborted XIDs as aborted, we should be fine: nothing else is possible
+ * here.
+ *
+ * We can't rely on the relcache for anything here, because that only knows
+ * about the database to which we are connected, and can't handle access to
+ * other databases. That also means we can't rely on the heap scan
+ * infrastructure, which would be a bad idea anyway since it might try
+ * to do things like HOT pruning which we definitely can't do safely in
+ * a database to which we're not even connected.
+ */
+static List *
+ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
+{
+ RelFileNode rnode;
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ Buffer buf;
+ Oid relfilenode;
+ Page page;
+ List *rnodelist = NIL;
+ LockRelId relid;
+ Relation rel;
+ Snapshot snapshot;
+ BufferAccessStrategy bstrategy;
+
+ /* Get pg_class relfilenode. */
+ relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+ RelationRelationId);
+
+ /* Don't read data into shared_buffers without holding a relation lock. */
+ relid.dbId = dbid;
+ relid.relId = RelationRelationId;
+ LockRelationId(&relid, AccessShareLock);
+
+ /* Prepare a RelFileNode for the pg_class relation. */
+ rnode.spcNode = tbid;
+ rnode.dbNode = dbid;
+ rnode.relNode = relfilenode;
+
+ /*
+ * We can't use a real relcache entry for a relation in some other
+ * database, but since we're only going to access the fields related
+ * to physical storage, a fake one is good enough. If we didn't do this
+ * and used the smgr layer directly, we would have to worry about
+ * invalidations.
+ */
+ rel = CreateFakeRelcacheEntry(rnode);
+ nblocks = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
+ FreeFakeRelcacheEntry(rel);
+
+ /* Use a buffer access strategy since this is a bulk read operation. */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ /*
+ * As explained in the function header comments, we need a snapshot that
+ * will see all committed transactions as committed, and our transaction
+ * snapshot - or the active snapshot - might not be new enough for that,
+ * but the return value of GetLatestSnapshot() should work fine.
+ */
+ snapshot = GetLatestSnapshot();
+
+ /* Process the relation block by block. */
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ buf = ReadBufferWithoutRelcache(rnode, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, bstrategy, false);
+
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+ page = BufferGetPage(buf);
+ if (PageIsNew(page) || PageIsEmpty(page))
+ {
+ UnlockReleaseBuffer(buf);
+ continue;
+ }
+
+ /* Append relevant pg_class tuples for current page to rnodelist. */
+ rnodelist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
+ srcpath, rnodelist,
+ snapshot);
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ /* Release relation lock. */
+ UnlockRelationId(&relid, AccessShareLock);
+
+ return rnodelist;
+}
+
+/*
+ * Scan one page of the source database's pg_class relation and add relevant
+ * entries to rnodelist. The return value is the updated list.
+ */
+static List *
+ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
+ char *srcpath, List *rnodelist,
+ Snapshot snapshot)
+{
+ BlockNumber blkno = BufferGetBlockNumber(buf);
+ OffsetNumber offnum;
+ OffsetNumber maxoff;
+ HeapTupleData tuple;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Loop over offsets. */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+
+ itemid = PageGetItemId(page, offnum);
+
+ /* Nothing to do if slot is empty or already dead. */
+ if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) ||
+ ItemIdIsRedirected(itemid))
+ continue;
+
+ Assert(ItemIdIsNormal(itemid));
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+ /* Initialize a HeapTupleData structure. */
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationRelationId;
+
+ /* Skip tuples that are not visible to this snapshot. */
+ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
+ {
+ CreateDBRelInfo *relinfo;
+
+ /*
+ * ScanSourceDatabasePgClassTuple is in charge of constructing
+ * a CreateDBRelInfo object for this tuple, but can also decide
+ * that this tuple isn't something we need to copy. If we do need
+ * to copy the relation, add it to the list.
+ */
+ relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid,
+ srcpath);
+ if (relinfo != NULL)
+ rnodelist = lappend(rnodelist, relinfo);
+ }
+ }
+
+ return rnodelist;
+}
+
+/*
+ * Decide whether a certain pg_class tuple represents something that
+ * needs to be copied from the source database to the destination database,
+ * and if so, construct a CreateDBRelInfo for it.
+ *
+ * Visbility checks are handled by the caller, so our job here is just
+ * to assess the data stored in the tuple.
+ */
+CreateDBRelInfo *
+ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid,
+ char *srcpath)
+{
+ CreateDBRelInfo *relinfo;
+ Form_pg_class classForm;
+ Oid relfilenode = InvalidOid;
+
+ classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+ /*
+ * Return NULL if this object does not need to be copied.
+ *
+ * Shared objects don't need to be copied, because they are shared.
+ * Objects without storage can't be copied, because there's nothing to
+ * copy. Temporary relations don't need to be copied either, because
+ * they are inaccessible outside of the session that created them,
+ * which must be gone already, and couldn't connect to a different database
+ * if it still existed. autovacuum will eventually remove the pg_class
+ * entries as well.
+ */
+ if (classForm->reltablespace == GLOBALTABLESPACE_OID ||
+ !RELKIND_HAS_STORAGE(classForm->relkind) ||
+ classForm->relpersistence == RELPERSISTENCE_TEMP)
+ return NULL;
+
+ /*
+ * If relfilenode is valid then directly use it. Otherwise, consult the
+ * relmap.
+ */
+ if (OidIsValid(classForm->relfilenode))
+ relfilenode = classForm->relfilenode;
+ else
+ relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+ classForm->oid);
+
+ /* We must have a valid relfilenode oid. */
+ if (!OidIsValid(relfilenode))
+ elog(ERROR, "relation with OID %u does not have a valid relfilenode",
+ classForm->oid);
+
+ /* Prepare a rel info element and add it to the list. */
+ relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo));
+ if (OidIsValid(classForm->reltablespace))
+ relinfo->rnode.spcNode = classForm->reltablespace;
+ else
+ relinfo->rnode.spcNode = tbid;
+
+ relinfo->rnode.dbNode = dbid;
+ relinfo->rnode.relNode = relfilenode;
+ relinfo->reloid = classForm->oid;
+
+ /* Temporary relations were rejected above. */
+ Assert(classForm->relpersistence != RELPERSISTENCE_TEMP);
+ relinfo->permanent =
+ (classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false;
+
+ return relinfo;
+}
+
+/*
+ * Create database directory and write out the PG_VERSION file in the database
+ * path. If isRedo is true, it's okay for the database directory to exist
+ * already.
+ */
+static void
+CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
+{
+ int fd;
+ int nbytes;
+ char versionfile[MAXPGPATH];
+ char buf[16];
+
+ /*
+ * Prepare version data before starting a critical section.
+ *
+ * Note that we don't have to copy this from the source database; there's
+ * only one legal value.
+ */
+ sprintf(buf, "%s\n", PG_MAJORVERSION);
+ nbytes = strlen(PG_MAJORVERSION) + 1;
+
+ /* If we are not in WAL replay then write the WAL. */
+ if (!isRedo)
+ {
+ xl_dbase_create_wal_log_rec xlrec;
+ XLogRecPtr lsn;
+
+ START_CRIT_SECTION();
+
+ xlrec.db_id = dbid;
+ xlrec.tablespace_id = tsid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec),
+ sizeof(xl_dbase_create_wal_log_rec));
+
+ lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
+
+ /* As always, WAL must hit the disk before the data update does. */
+ XLogFlush(lsn);
+ }
+
+ /* Create database directory. */
+ if (MakePGDirectory(dbpath) < 0)
+ {
+ /* Failure other than already exists or not in WAL replay? */
+ if (errno != EEXIST || !isRedo)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m", dbpath)));
+ }
+
+ /*
+ * Create PG_VERSION file in the database path. If the file already
+ * exists and we are in WAL replay then try again to open it in write
+ * mode.
+ */
+ snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION");
+
+ fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
+ if (fd < 0 && errno == EEXIST && isRedo)
+ fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY);
+
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", versionfile)));
+
+ /* Write PG_MAJORVERSION in the PG_VERSION file. */
+ pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE);
+ errno = 0;
+ if ((int) write(fd, buf, nbytes) != nbytes)
+ {
+ /* If write didn't set errno, assume problem is no disk space. */
+ if (errno == 0)
+ errno = ENOSPC;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", versionfile)));
+ }
+ pgstat_report_wait_end();
+
+ /* Close the version file. */
+ CloseTransientFile(fd);
+
+ /* Critical section done. */
+ if (!isRedo)
+ END_CRIT_SECTION();
+}
+
+/*
+ * Create a new database using the FILE_COPY strategy.
+ *
+ * Copy each tablespace at the filesystem level, and log a single WAL record
+ * for each tablespace copied. This requires a checkpoint before and after the
+ * copy, which may be expensive, but it does greatly reduce WAL generation
+ * if the copied database is large.
+ */
+static void
+CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
+ Oid dst_tsid)
+{
+ TableScanDesc scan;
+ Relation rel;
+ HeapTuple tuple;
+
+ /*
+ * Force a checkpoint before starting the copy. This will force all dirty
+ * buffers, including those of unlogged tables, out to disk, to ensure
+ * source database is up-to-date on disk for the copy.
+ * FlushDatabaseBuffers() would suffice for that, but we also want to
+ * process any pending unlink requests. Otherwise, if a checkpoint
+ * happened while we're copying files, a file might be deleted just when
+ * we're about to copy it, causing the lstat() call in copydir() to fail
+ * with ENOENT.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+ CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+
+ /*
+ * Iterate through all tablespaces of the template database, and copy each
+ * one to the new database.
+ */
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 0, NULL);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
+ Oid srctablespace = spaceform->oid;
+ Oid dsttablespace;
+ char *srcpath;
+ char *dstpath;
+ struct stat st;
+
+ /* No need to copy global tablespace */
+ if (srctablespace == GLOBALTABLESPACE_OID)
+ continue;
+
+ srcpath = GetDatabasePath(src_dboid, srctablespace);
+
+ if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
+ directory_is_empty(srcpath))
+ {
+ /* Assume we can ignore it */
+ pfree(srcpath);
+ continue;
+ }
+
+ if (srctablespace == src_tsid)
+ dsttablespace = dst_tsid;
+ else
+ dsttablespace = srctablespace;
+
+ dstpath = GetDatabasePath(dst_dboid, dsttablespace);
+
+ /*
+ * Copy this subdirectory to the new location
+ *
+ * We don't need to copy subdirectories
+ */
+ copydir(srcpath, dstpath, false);
+
+ /* Record the filesystem change in XLOG */
+ {
+ xl_dbase_create_file_copy_rec xlrec;
+
+ xlrec.db_id = dst_dboid;
+ xlrec.tablespace_id = dsttablespace;
+ xlrec.src_db_id = src_dboid;
+ xlrec.src_tablespace_id = srctablespace;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ sizeof(xl_dbase_create_file_copy_rec));
+
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
+ }
+ }
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+
+ /*
+ * We force a checkpoint before committing. This effectively means that
+ * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be
+ * replayed (at least not in ordinary crash recovery; we still have to
+ * make the XLOG entry for the benefit of PITR operations). This avoids
+ * two nasty scenarios:
+ *
+ * #1: When PITR is off, we don't XLOG the contents of newly created
+ * indexes; therefore the drop-and-recreate-whole-directory behavior of
+ * DBASE_CREATE replay would lose such indexes.
+ *
+ * #2: Since we have to recopy the source database during DBASE_CREATE
+ * replay, we run the risk of copying changes in it that were committed
+ * after the original CREATE DATABASE command but before the system crash
+ * that led to the replay. This is at least unexpected and at worst could
+ * lead to inconsistencies, eg duplicate table names.
+ *
+ * (Both of these were real bugs in releases 8.0 through 8.0.3.)
+ *
+ * In PITR replay, the first of these isn't an issue, and the second is
+ * only a risk if the CREATE DATABASE and subsequent template database
+ * change both occur while a base backup is being taken. There doesn't
+ * seem to be much we can do about that except document it as a
+ * limitation.
+ *
+ * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE
+ * strategy that avoids these problems.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+}
/*
* CREATE DATABASE
@@ -101,8 +669,6 @@ static int errdetail_busy_db(int notherbackends, int npreparedxacts);
Oid
createdb(ParseState *pstate, const CreatedbStmt *stmt)
{
- TableScanDesc scan;
- Relation rel;
Oid src_dboid;
Oid src_owner;
int src_encoding = -1;
@@ -137,6 +703,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
DefElem *dallowconnections = NULL;
DefElem *dconnlimit = NULL;
DefElem *dcollversion = NULL;
+ DefElem *dstrategy = NULL;
char *dbname = stmt->dbname;
char *dbowner = NULL;
const char *dbtemplate = NULL;
@@ -152,6 +719,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
char *dbcollversion = NULL;
int notherbackends;
int npreparedxacts;
+ CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG;
createdb_failure_params fparms;
/* Extract options from the statement node tree */
@@ -269,6 +837,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId));
}
+ else if (strcmp(defel->defname, "strategy") == 0)
+ {
+ if (dstrategy)
+ errorConflictingDefElem(defel, pstate);
+ dstrategy = defel;
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -413,6 +987,23 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dbtemplate)));
}
+ /* Validate the database creation strategy. */
+ if (dstrategy && dstrategy->arg)
+ {
+ char *strategy;
+
+ strategy = defGetString(dstrategy);
+ if (strcmp(strategy, "wal_log") == 0)
+ dbstrategy = CREATEDB_WAL_LOG;
+ else if (strcmp(strategy, "file_copy") == 0)
+ dbstrategy = CREATEDB_FILE_COPY;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid create database strategy %s", strategy),
+ errhint("Valid strategies are \"wal_log\", and \"file_copy\".")));
+ }
+
/* If encoding or locales are defaulted, use source's setting */
if (encoding < 0)
encoding = src_encoding;
@@ -753,17 +1344,18 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);
/*
- * Force a checkpoint before starting the copy. This will force all dirty
- * buffers, including those of unlogged tables, out to disk, to ensure
- * source database is up-to-date on disk for the copy.
- * FlushDatabaseBuffers() would suffice for that, but we also want to
- * process any pending unlink requests. Otherwise, if a checkpoint
- * happened while we're copying files, a file might be deleted just when
- * we're about to copy it, causing the lstat() call in copydir() to fail
- * with ENOENT.
+ * If we're going to be reading data for the to-be-created database
+ * into shared_buffers, take a lock on it. Nobody should know that this
+ * database exists yet, but it's good to maintain the invariant that a
+ * lock an AccessExclusiveLock on the database is sufficient to drop all
+ * of its buffers without worrying about more being read later.
+ *
+ * Note that we need to do this before entering the PG_ENSURE_ERROR_CLEANUP
+ * block below, because createdb_failure_callback expects this lock to
+ * be held already.
*/
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
- | CHECKPOINT_FLUSH_ALL);
+ if (dbstrategy == CREATEDB_WAL_LOG)
+ LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock);
/*
* Once we start copying subdirectories, we need to be able to clean 'em
@@ -774,101 +1366,24 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
*/
fparms.src_dboid = src_dboid;
fparms.dest_dboid = dboid;
+ fparms.strategy = dbstrategy;
+
PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
PointerGetDatum(&fparms));
{
/*
- * Iterate through all tablespaces of the template database, and copy
- * each one to the new database.
+ * If the user has asked to create a database with WAL_LOG strategy
+ * then call CreateDatabaseUsingWalLog, which will copy the database
+ * at the block level and it will WAL log each copied block.
+ * Otherwise, call CreateDatabaseUsingFileCopy that will copy the
+ * database file by file.
*/
- rel = table_open(TableSpaceRelationId, AccessShareLock);
- scan = table_beginscan_catalog(rel, 0, NULL);
- while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
- {
- Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
- Oid srctablespace = spaceform->oid;
- Oid dsttablespace;
- char *srcpath;
- char *dstpath;
- struct stat st;
-
- /* No need to copy global tablespace */
- if (srctablespace == GLOBALTABLESPACE_OID)
- continue;
-
- srcpath = GetDatabasePath(src_dboid, srctablespace);
-
- if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
- directory_is_empty(srcpath))
- {
- /* Assume we can ignore it */
- pfree(srcpath);
- continue;
- }
-
- if (srctablespace == src_deftablespace)
- dsttablespace = dst_deftablespace;
- else
- dsttablespace = srctablespace;
-
- dstpath = GetDatabasePath(dboid, dsttablespace);
-
- /*
- * Copy this subdirectory to the new location
- *
- * We don't need to copy subdirectories
- */
- copydir(srcpath, dstpath, false);
-
- /* Record the filesystem change in XLOG */
- {
- xl_dbase_create_rec xlrec;
-
- xlrec.db_id = dboid;
- xlrec.tablespace_id = dsttablespace;
- xlrec.src_db_id = src_dboid;
- xlrec.src_tablespace_id = srctablespace;
-
- XLogBeginInsert();
- XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
-
- (void) XLogInsert(RM_DBASE_ID,
- XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
- }
- }
- table_endscan(scan);
- table_close(rel, AccessShareLock);
-
- /*
- * We force a checkpoint before committing. This effectively means
- * that committed XLOG_DBASE_CREATE operations will never need to be
- * replayed (at least not in ordinary crash recovery; we still have to
- * make the XLOG entry for the benefit of PITR operations). This
- * avoids two nasty scenarios:
- *
- * #1: When PITR is off, we don't XLOG the contents of newly created
- * indexes; therefore the drop-and-recreate-whole-directory behavior
- * of DBASE_CREATE replay would lose such indexes.
- *
- * #2: Since we have to recopy the source database during DBASE_CREATE
- * replay, we run the risk of copying changes in it that were
- * committed after the original CREATE DATABASE command but before the
- * system crash that led to the replay. This is at least unexpected
- * and at worst could lead to inconsistencies, eg duplicate table
- * names.
- *
- * (Both of these were real bugs in releases 8.0 through 8.0.3.)
- *
- * In PITR replay, the first of these isn't an issue, and the second
- * is only a risk if the CREATE DATABASE and subsequent template
- * database change both occur while a base backup is being taken.
- * There doesn't seem to be much we can do about that except document
- * it as a limitation.
- *
- * Perhaps if we ever implement CREATE DATABASE in a less cheesy way,
- * we can avoid this.
- */
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+ if (dbstrategy == CREATEDB_WAL_LOG)
+ CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace,
+ dst_deftablespace);
+ else
+ CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace,
+ dst_deftablespace);
/*
* Close pg_database, but keep lock till commit.
@@ -954,6 +1469,25 @@ createdb_failure_callback(int code, Datum arg)
{
createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg);
+ /*
+ * If we were copying database at block levels then drop pages for the
+ * destination database that are in the shared buffer cache. And tell
+ * checkpointer to forget any pending fsync and unlink requests for files
+ * in the database. The reasoning behind doing this is same as explained
+ * in dropdb function. But unlike dropdb we don't need to call
+ * pgstat_drop_database because this database is still not created so
+ * there should not be any stat for this.
+ */
+ if (fparms->strategy == CREATEDB_WAL_LOG)
+ {
+ DropDatabaseBuffers(fparms->dest_dboid);
+ ForgetDatabaseSyncRequests(fparms->dest_dboid);
+
+ /* Release lock on the target database. */
+ UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0,
+ AccessShareLock);
+ }
+
/*
* Release lock on source database before doing recursive remove. This is
* not essential but it seems desirable to release the lock as soon as
@@ -1478,7 +2012,7 @@ movedb(const char *dbname, const char *tblspcname)
* Record the filesystem change in XLOG
*/
{
- xl_dbase_create_rec xlrec;
+ xl_dbase_create_file_copy_rec xlrec;
xlrec.db_id = db_id;
xlrec.tablespace_id = dst_tblspcoid;
@@ -1486,10 +2020,11 @@ movedb(const char *dbname, const char *tblspcname)
xlrec.src_tablespace_id = src_tblspcoid;
XLogBeginInsert();
- XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
+ XLogRegisterData((char *) &xlrec,
+ sizeof(xl_dbase_create_file_copy_rec));
(void) XLogInsert(RM_DBASE_ID,
- XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
+ XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
}
/*
@@ -1525,9 +2060,10 @@ movedb(const char *dbname, const char *tblspcname)
/*
* Force another checkpoint here. As in CREATE DATABASE, this is to
- * ensure that we don't have to replay a committed XLOG_DBASE_CREATE
- * operation, which would cause us to lose any unlogged operations
- * done in the new DB tablespace before the next checkpoint.
+ * ensure that we don't have to replay a committed
+ * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose
+ * any unlogged operations done in the new DB tablespace before the
+ * next checkpoint.
*/
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
@@ -2478,9 +3014,10 @@ dbase_redo(XLogReaderState *record)
/* Backup blocks are not used in dbase records */
Assert(!XLogRecHasAnyBlockRefs(record));
- if (info == XLOG_DBASE_CREATE)
+ if (info == XLOG_DBASE_CREATE_FILE_COPY)
{
- xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record);
+ xl_dbase_create_file_copy_rec *xlrec =
+ (xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
char *src_path;
char *dst_path;
struct stat st;
@@ -2515,6 +3052,18 @@ dbase_redo(XLogReaderState *record)
*/
copydir(src_path, dst_path, false);
}
+ else if (info == XLOG_DBASE_CREATE_WAL_LOG)
+ {
+ xl_dbase_create_wal_log_rec *xlrec =
+ (xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
+ char *dbpath;
+
+ dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+
+ /* Create the database directory with the version file. */
+ CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
+ true);
+ }
else if (info == XLOG_DBASE_DROP)
{
xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 124b9961dc9..51b4a00d50d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -14626,7 +14626,7 @@ index_copy_data(Relation rel, RelFileNode newrnode)
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
- RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
+ RelationCreateStorage(newrnode, rel->rd_rel->relpersistence, true);
/* copy main fork */
RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 11005edc735..d73a40c1bc6 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -38,6 +38,7 @@
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
+#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
@@ -486,6 +487,9 @@ static void FindAndDropRelFileNodeBuffers(RelFileNode rnode,
ForkNumber forkNum,
BlockNumber nForkBlock,
BlockNumber firstDelBlock);
+static void RelationCopyStorageUsingBuffer(Relation src, Relation dst,
+ ForkNumber forkNum,
+ bool isunlogged);
static void AtProcExit_Buffers(int code, Datum arg);
static void CheckForBufferLeaks(void);
static int rnode_comparator(const void *p1, const void *p2);
@@ -772,23 +776,23 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
* ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require
* a relcache entry for the relation.
*
- * NB: At present, this function may only be used on permanent relations, which
- * is OK, because we only use it during XLOG replay. If in the future we
- * want to use it on temporary or unlogged relations, we could pass additional
- * parameters.
+ * Pass permanent = true for a RELPERSISTENCE_PERMANENT relation, and
+ * permanent = false for a RELPERSISTENCE_UNLOGGED relation. This function
+ * cannot be used for temporary relations (and making that work might be
+ * difficult, unless we only want to read temporary relations for our own
+ * BackendId).
*/
Buffer
ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
- BufferAccessStrategy strategy)
+ BufferAccessStrategy strategy, bool permanent)
{
bool hit;
SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
- Assert(InRecovery);
-
- return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
+ return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
+ RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
mode, strategy, &hit);
}
@@ -3676,6 +3680,158 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
pfree(srels);
}
+/* ---------------------------------------------------------------------
+ * RelationCopyStorageUsingBuffer
+ *
+ * Copy fork's data using bufmgr. Same as RelationCopyStorage but instead
+ * of using smgrread and smgrextend this will copy using bufmgr APIs.
+ *
+ * Refer comments atop CreateAndCopyRelationData() for details about
+ * 'permanent' parameter.
+ * --------------------------------------------------------------------
+ */
+static void
+RelationCopyStorageUsingBuffer(Relation src, Relation dst, ForkNumber forkNum,
+ bool permanent)
+{
+ Buffer srcBuf;
+ Buffer dstBuf;
+ Page srcPage;
+ Page dstPage;
+ bool use_wal;
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ BufferAccessStrategy bstrategy_src;
+ BufferAccessStrategy bstrategy_dst;
+
+ /*
+ * In general, we want to write WAL whenever wal_level > 'minimal', but
+ * we can skip it when copying any fork of an unlogged relation other
+ * than the init fork.
+ */
+ use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
+
+ /* Get number of blocks in the source relation. */
+ nblocks = smgrnblocks(RelationGetSmgr(src), forkNum);
+
+ /* Nothing to copy; just return. */
+ if (nblocks == 0)
+ return;
+
+ /* This is a bulk operation, so use buffer access strategies. */
+ bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
+ bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
+
+ /* Iterate over each block of the source relation file. */
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /* Read block from source relation. */
+ srcBuf = ReadBufferWithoutRelcache(src->rd_node, forkNum, blkno,
+ RBM_NORMAL, bstrategy_src,
+ permanent);
+ srcPage = BufferGetPage(srcBuf);
+ if (PageIsNew(srcPage) || PageIsEmpty(srcPage))
+ {
+ ReleaseBuffer(srcBuf);
+ continue;
+ }
+
+ /* Use P_NEW to extend the destination relation. */
+ dstBuf = ReadBufferWithoutRelcache(dst->rd_node, forkNum, P_NEW,
+ RBM_NORMAL, bstrategy_dst,
+ permanent);
+ LockBuffer(dstBuf, BUFFER_LOCK_EXCLUSIVE);
+
+ START_CRIT_SECTION();
+
+ /* Copy page data from the source to the destination. */
+ dstPage = BufferGetPage(dstBuf);
+ memcpy(dstPage, srcPage, BLCKSZ);
+ MarkBufferDirty(dstBuf);
+
+ /* WAL-log the copied page. */
+ if (use_wal)
+ log_newpage_buffer(dstBuf, true);
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(dstBuf);
+ ReleaseBuffer(srcBuf);
+ }
+}
+
+/* ---------------------------------------------------------------------
+ * CreateAndCopyRelationData
+ *
+ * Create destination relation storage and copy all forks from the
+ * source relation to the destination.
+ *
+ * Pass permanent as true for permanent relations and false for
+ * unlogged relations. Currently this API is not supported for
+ * temporary relations.
+ * --------------------------------------------------------------------
+ */
+void
+CreateAndCopyRelationData(RelFileNode src_rnode, RelFileNode dst_rnode,
+ bool permanent)
+{
+ Relation src_rel;
+ Relation dst_rel;
+ char relpersistence;
+
+ /* Set the relpersistence. */
+ relpersistence = permanent ?
+ RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
+
+ /*
+ * We can't use a real relcache entry for a relation in some other
+ * database, but since we're only going to access the fields related
+ * to physical storage, a fake one is good enough. If we didn't do this
+ * and used the smgr layer directly, we would have to worry about
+ * invalidations.
+ */
+ src_rel = CreateFakeRelcacheEntry(src_rnode);
+ dst_rel = CreateFakeRelcacheEntry(dst_rnode);
+
+ /*
+ * Create and copy all forks of the relation. During create database we
+ * have a separate cleanup mechanism which deletes complete database
+ * directory. Therefore, each individual relation doesn't need to be
+ * registered for cleanup.
+ */
+ RelationCreateStorage(dst_rnode, relpersistence, false);
+
+ /* copy main fork. */
+ RelationCopyStorageUsingBuffer(src_rel, dst_rel, MAIN_FORKNUM, permanent);
+
+ /* copy those extra forks that exist */
+ for (ForkNumber forkNum = MAIN_FORKNUM + 1;
+ forkNum <= MAX_FORKNUM; forkNum++)
+ {
+ if (smgrexists(RelationGetSmgr(src_rel), forkNum))
+ {
+ smgrcreate(RelationGetSmgr(dst_rel), forkNum, false);
+
+ /*
+ * WAL log creation if the relation is persistent, or this is the
+ * init fork of an unlogged relation.
+ */
+ if (permanent || forkNum == INIT_FORKNUM)
+ log_smgrcreate(&dst_rnode, forkNum);
+
+ /* Copy a fork's data, block by block. */
+ RelationCopyStorageUsingBuffer(src_rel, dst_rel, forkNum,
+ permanent);
+ }
+ }
+
+ /* Release fake relcache entries. */
+ FreeFakeRelcacheEntry(src_rel);
+ FreeFakeRelcacheEntry(dst_rel);
+}
+
/* ---------------------------------------------------------------------
* FlushDatabaseBuffers
*
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 5ae52dd14db..1543da61620 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -175,6 +175,34 @@ ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
return true;
}
+/*
+ * LockRelationId
+ *
+ * Lock, given a LockRelId. Same as LockRelationOid but take LockRelId as an
+ * input.
+ */
+void
+LockRelationId(LockRelId *relid, LOCKMODE lockmode)
+{
+ LOCKTAG tag;
+ LOCALLOCK *locallock;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_RELATION(tag, relid->dbId, relid->relId);
+
+ res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock);
+
+ /*
+ * Now that we have the lock, check for invalidation messages; see notes
+ * in LockRelationOid.
+ */
+ if (res != LOCKACQUIRE_ALREADY_CLEAR)
+ {
+ AcceptInvalidationMessages();
+ MarkLockClear(locallock);
+ }
+}
+
/*
* UnlockRelationId
*
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index ff46a0e3c71..1c8aba49259 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -705,6 +705,9 @@ pgstat_get_wait_io(WaitEventIO w)
case WAIT_EVENT_TWOPHASE_FILE_WRITE:
event_name = "TwophaseFileWrite";
break;
+ case WAIT_EVENT_VERSION_FILE_WRITE:
+ event_name = "VersionFileWrite";
+ break;
case WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ:
event_name = "WALSenderTimelineHistoryRead";
break;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d47fac7bb98..a15ce9edb13 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -3746,7 +3746,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
/* handle these directly, at least for now */
SMgrRelation srel;
- srel = RelationCreateStorage(newrnode, persistence);
+ srel = RelationCreateStorage(newrnode, persistence, true);
smgrclose(srel);
}
else
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 4d0718f0018..dee3387d026 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -251,6 +251,63 @@ RelationMapFilenodeToOid(Oid filenode, bool shared)
return InvalidOid;
}
+/*
+ * RelationMapOidToFilenodeForDatabase
+ *
+ * Like RelationMapOidToFilenode, but reads the mapping from the indicated
+ * path instead of using the one for the current database.
+ */
+Oid
+RelationMapOidToFilenodeForDatabase(char *dbpath, Oid relationId)
+{
+ RelMapFile map;
+ int i;
+
+ /* Read the relmap file from the source database. */
+ read_relmap_file(&map, dbpath, false, ERROR);
+
+ /* Iterate over the relmap entries to find the input relation OID. */
+ for (i = 0; i < map.num_mappings; i++)
+ {
+ if (relationId == map.mappings[i].mapoid)
+ return map.mappings[i].mapfilenode;
+ }
+
+ return InvalidOid;
+}
+
+/*
+ * RelationMapCopy
+ *
+ * Copy relmapfile from source db path to the destination db path and WAL log
+ * the operation. This is intended for use in creating a new relmap file
+ * for a database that doesn't have one yet, not for replacing an existing
+ * relmap file.
+ */
+void
+RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
+{
+ RelMapFile map;
+
+ /*
+ * Read the relmap file from the source database.
+ */
+ read_relmap_file(&map, srcdbpath, false, ERROR);
+
+ /*
+ * Write the same data into the destination database's relmap file.
+ *
+ * No sinval is needed because no one can be connected to the destination
+ * database yet. For the same reason, there is no need to acquire
+ * RelationMappingLock.
+ *
+ * There's no point in trying to preserve files here. The new database
+ * isn't usable yet anyway, and won't ever be if we can't install a
+ * relmap file.
+ */
+ write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
+}
+
/*
* RelationMapUpdateMap
*
@@ -1031,6 +1088,13 @@ relmap_redo(XLogReaderState *record)
*
* There shouldn't be anyone else updating relmaps during WAL replay,
* but grab the lock to interlock against load_relmap_file().
+ *
+ * Note that we use the same WAL record for updating the relmap of
+ * an existing database as we do for creating a new database. In
+ * the latter case, taking the relmap log and sending sinval messages
+ * is unnecessary, but harmless. If we wanted to avoid it, we could
+ * add a flag to the WAL record to indicate which opration is being
+ * performed.
*/
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
write_relmap_file(&newmap, false, true, false,
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 3ed2a2e811e..49966e7b7fd 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -372,7 +372,7 @@ extractPageInfo(XLogReaderState *record)
/* Is this a special record type that I recognize? */
- if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE)
+ if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_FILE_COPY)
{
/*
* New databases can be safely ignored. It won't be present in the
@@ -384,6 +384,13 @@ extractPageInfo(XLogReaderState *record)
* overwriting the database created in the target system.
*/
}
+ else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_WAL_LOG)
+ {
+ /*
+ * New databases can be safely ignored. It won't be present in the
+ * source system, so it will be deleted.
+ */
+ }
else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
{
/*
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index c97d3e87f0c..3f9dfffd57f 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2791,13 +2791,15 @@ psql_completion(const char *text, int start, int end)
/* CREATE DATABASE */
else if (Matches("CREATE", "DATABASE", MatchAny))
COMPLETE_WITH("OWNER", "TEMPLATE", "ENCODING", "TABLESPACE",
- "IS_TEMPLATE",
+ "IS_TEMPLATE", "STRATEGY",
"ALLOW_CONNECTIONS", "CONNECTION LIMIT",
"LC_COLLATE", "LC_CTYPE", "LOCALE", "OID",
"LOCALE_PROVIDER", "ICU_LOCALE");
else if (Matches("CREATE", "DATABASE", MatchAny, "TEMPLATE"))
COMPLETE_WITH_QUERY(Query_for_list_of_template_databases);
+ else if (Matches("CREATE", "DATABASE", MatchAny, "STRATEGY"))
+ COMPLETE_WITH("WAL_LOG", "FILE_COPY");
/* CREATE DOMAIN */
else if (Matches("CREATE", "DOMAIN", MatchAny))
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 6f612abf7c6..0bffa2f3ee4 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -34,6 +34,7 @@ main(int argc, char *argv[])
{"tablespace", required_argument, NULL, 'D'},
{"template", required_argument, NULL, 'T'},
{"encoding", required_argument, NULL, 'E'},
+ {"strategy", required_argument, NULL, 'S'},
{"lc-collate", required_argument, NULL, 1},
{"lc-ctype", required_argument, NULL, 2},
{"locale", required_argument, NULL, 'l'},
@@ -60,6 +61,7 @@ main(int argc, char *argv[])
char *tablespace = NULL;
char *template = NULL;
char *encoding = NULL;
+ char *strategy = NULL;
char *lc_collate = NULL;
char *lc_ctype = NULL;
char *locale = NULL;
@@ -77,7 +79,7 @@ main(int argc, char *argv[])
handle_help_version_opts(argc, argv, "createdb", help);
- while ((c = getopt_long(argc, argv, "h:p:U:wWeO:D:T:E:l:", long_options, &optindex)) != -1)
+ while ((c = getopt_long(argc, argv, "h:p:U:wWeO:D:T:E:l:S:", long_options, &optindex)) != -1)
{
switch (c)
{
@@ -111,6 +113,9 @@ main(int argc, char *argv[])
case 'E':
encoding = pg_strdup(optarg);
break;
+ case 'S':
+ strategy = pg_strdup(optarg);
+ break;
case 1:
lc_collate = pg_strdup(optarg);
break;
@@ -215,6 +220,8 @@ main(int argc, char *argv[])
appendPQExpBufferStr(&sql, " ENCODING ");
appendStringLiteralConn(&sql, encoding, conn);
}
+ if (strategy)
+ appendPQExpBuffer(&sql, " STRATEGY %s", fmtId(strategy));
if (template)
appendPQExpBuffer(&sql, " TEMPLATE %s", fmtId(template));
if (lc_collate)
@@ -294,6 +301,7 @@ help(const char *progname)
printf(_(" --locale-provider={libc|icu}\n"
" locale provider for the database's default collation\n"));
printf(_(" -O, --owner=OWNER database user to own the new database\n"));
+ printf(_(" -S, --strategy=STRATEGY database creation strategy wal_log or file_copy\n"));
printf(_(" -T, --template=TEMPLATE template database to copy\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 35deec9a929..14d3a9563d1 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -104,4 +104,24 @@ $node->command_checks_all(
],
'createdb with incorrect --lc-ctype');
+$node->command_checks_all(
+ [ 'createdb', '--strategy', "foo", 'foobar2' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/^createdb: error: database creation failed: ERROR: invalid create database strategy|^createdb: error: database creation failed: ERROR: invalid create database strategy foo/s
+ ],
+ 'createdb with incorrect --strategy');
+
+# Check database creation strategy
+$node->issues_sql_like(
+ [ 'createdb', '-T', 'foobar2', 'foobar6', '-S', 'wal_log'],
+ qr/statement: CREATE DATABASE foobar6 STRATEGY wal_log TEMPLATE foobar2/,
+ 'create database with WAL_LOG strategy');
+
+$node->issues_sql_like(
+ [ 'createdb', '-T', 'foobar2', 'foobar7', '-S', 'file_copy'],
+ qr/statement: CREATE DATABASE foobar7 STRATEGY file_copy TEMPLATE foobar2/,
+ 'create database with FILE_COPY strategy');
+
done_testing();
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 9ffc7419131..844a023b2ce 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -22,7 +22,9 @@
/* GUC variables */
extern int wal_skip_threshold;
-extern SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence);
+extern SMgrRelation RelationCreateStorage(RelFileNode rnode,
+ char relpersistence,
+ bool register_delete);
extern void RelationDropStorage(Relation rel);
extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
extern void RelationPreTruncate(Relation rel);
diff --git a/src/include/commands/dbcommands_xlog.h b/src/include/commands/dbcommands_xlog.h
index 593a8578a41..0ee2452feba 100644
--- a/src/include/commands/dbcommands_xlog.h
+++ b/src/include/commands/dbcommands_xlog.h
@@ -18,17 +18,32 @@
#include "lib/stringinfo.h"
/* record types */
-#define XLOG_DBASE_CREATE 0x00
-#define XLOG_DBASE_DROP 0x10
+#define XLOG_DBASE_CREATE_FILE_COPY 0x00
+#define XLOG_DBASE_CREATE_WAL_LOG 0x10
+#define XLOG_DBASE_DROP 0x20
-typedef struct xl_dbase_create_rec
+/*
+ * Single WAL record for an entire CREATE DATABASE operation. This is used
+ * by the FILE_COPY strategy.
+ */
+typedef struct xl_dbase_create_file_copy_rec
{
- /* Records copying of a single subdirectory incl. contents */
Oid db_id;
Oid tablespace_id;
Oid src_db_id;
Oid src_tablespace_id;
-} xl_dbase_create_rec;
+} xl_dbase_create_file_copy_rec;
+
+/*
+ * WAL record for the beginning of a CREATE DATABASE operation, when the
+ * WAL_LOG strategy is used. Each individual block will be logged separately
+ * afterward.
+ */
+typedef struct xl_dbase_create_wal_log_rec
+{
+ Oid db_id;
+ Oid tablespace_id;
+} xl_dbase_create_wal_log_rec;
typedef struct xl_dbase_drop_rec
{
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index dd01841c300..a6b657f0ba5 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -184,7 +184,8 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
BufferAccessStrategy strategy);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
ForkNumber forkNum, BlockNumber blockNum,
- ReadBufferMode mode, BufferAccessStrategy strategy);
+ ReadBufferMode mode, BufferAccessStrategy strategy,
+ bool permanent);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
@@ -203,6 +204,9 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
extern void FlushOneBuffer(Buffer buffer);
extern void FlushRelationBuffers(Relation rel);
extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
+extern void CreateAndCopyRelationData(RelFileNode src_rnode,
+ RelFileNode dst_rnode,
+ bool permanent);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
int nforks, BlockNumber *firstDelBlock);
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index 49edbcc81be..be1d2c99a95 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -38,6 +38,7 @@ extern void RelationInitLockInfo(Relation relation);
/* Lock a relation */
extern void LockRelationOid(Oid relid, LOCKMODE lockmode);
+extern void LockRelationId(LockRelId *relid, LOCKMODE lockmode);
extern bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode);
extern void UnlockRelationId(LockRelId *relid, LOCKMODE lockmode);
extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h
index 9fbb5a7f9b5..f10353e1390 100644
--- a/src/include/utils/relmapper.h
+++ b/src/include/utils/relmapper.h
@@ -38,7 +38,9 @@ typedef struct xl_relmap_update
extern Oid RelationMapOidToFilenode(Oid relationId, bool shared);
extern Oid RelationMapFilenodeToOid(Oid relationId, bool shared);
-
+extern Oid RelationMapOidToFilenodeForDatabase(char *dbpath, Oid relationId);
+extern void RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath,
+ char *dstdbpath);
extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
bool immediate);
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 1c39ce031a7..d870c592632 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -218,6 +218,7 @@ typedef enum
WAIT_EVENT_TWOPHASE_FILE_READ,
WAIT_EVENT_TWOPHASE_FILE_SYNC,
WAIT_EVENT_TWOPHASE_FILE_WRITE,
+ WAIT_EVENT_VERSION_FILE_WRITE,
WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 410c9f6b0d1..6b77cc64ef4 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -461,6 +461,8 @@ CoverPos
CreateAmStmt
CreateCastStmt
CreateConversionStmt
+CreateDBRelInfo
+CreateDBStrategy
CreateDomainStmt
CreateEnumStmt
CreateEventTrigStmt
@@ -3705,7 +3707,8 @@ xl_btree_update
xl_btree_vacuum
xl_clog_truncate
xl_commit_ts_truncate
-xl_dbase_create_rec
+xl_dbase_create_file_copy_rec
+xl_dbase_create_wal_log_rec
xl_dbase_drop_rec
xl_end_of_recovery
xl_hash_add_ovfl_page