mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it avoids the need to checkpoint before and after the operation. However, because it logs each changed block individually, it might generate a lot of extra write-ahead logging if the template database is large. Therefore, the older strategy remains available via a new STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy option to createdb. Somewhat controversially, this patch assembles the list of relations to be copied to the new database by reading the pg_class relation of the template database. Cross-database access like this isn't normally possible, but it can be made to work here because there can't be any connections to the database being copied, nor can it contain any in-doubt transactions. Even so, we have to use lower-level interfaces than normal, since the table scan and relcache interfaces will not work for a database to which we're not connected. The advantage of this approach is that we do not need to rely on the filesystem to determine what ought to be copied, but instead on PostgreSQL's own knowledge of the database structure. This avoids, for example, copying stray files that happen to be located in the source database directory. Dilip Kumar, with a fairly large number of cosmetic changes by me. Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor, Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian, Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others. Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
This commit is contained in:
@@ -593,7 +593,7 @@ heapam_relation_set_new_filenode(Relation rel,
|
||||
*/
|
||||
*minmulti = GetOldestMultiXactId();
|
||||
|
||||
srel = RelationCreateStorage(*newrnode, persistence);
|
||||
srel = RelationCreateStorage(*newrnode, persistence, true);
|
||||
|
||||
/*
|
||||
* If required, set up an init fork for an unlogged table so that it can
|
||||
@@ -601,7 +601,7 @@ heapam_relation_set_new_filenode(Relation rel,
|
||||
* even if the page has been logged, because the write did not go through
|
||||
* shared_buffers and therefore a concurrent checkpoint may have moved the
|
||||
* redo pointer past our xlog record. Recovery may as well remove it
|
||||
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
|
||||
* while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
|
||||
* record. Therefore, logging is necessary even if wal_level=minimal.
|
||||
*/
|
||||
if (persistence == RELPERSISTENCE_UNLOGGED)
|
||||
@@ -645,7 +645,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
|
||||
* NOTE: any conflict in relfilenode value will be caught in
|
||||
* RelationCreateStorage().
|
||||
*/
|
||||
RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
|
||||
RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence, true);
|
||||
|
||||
/* copy main fork */
|
||||
RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
|
||||
|
||||
@@ -161,7 +161,7 @@ btbuildempty(Relation index)
|
||||
* Write the page and log it. It might seem that an immediate sync would
|
||||
* be sufficient to guarantee that the file exists on disk, but recovery
|
||||
* itself might remove it while replaying, for example, an
|
||||
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
|
||||
* XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
|
||||
* this even when wal_level=minimal.
|
||||
*/
|
||||
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
|
||||
|
||||
@@ -24,14 +24,23 @@ dbase_desc(StringInfo buf, XLogReaderState *record)
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_DBASE_CREATE)
|
||||
if (info == XLOG_DBASE_CREATE_FILE_COPY)
|
||||
{
|
||||
xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) rec;
|
||||
xl_dbase_create_file_copy_rec *xlrec =
|
||||
(xl_dbase_create_file_copy_rec *) rec;
|
||||
|
||||
appendStringInfo(buf, "copy dir %u/%u to %u/%u",
|
||||
xlrec->src_tablespace_id, xlrec->src_db_id,
|
||||
xlrec->tablespace_id, xlrec->db_id);
|
||||
}
|
||||
else if (info == XLOG_DBASE_CREATE_WAL_LOG)
|
||||
{
|
||||
xl_dbase_create_wal_log_rec *xlrec =
|
||||
(xl_dbase_create_wal_log_rec *) rec;
|
||||
|
||||
appendStringInfo(buf, "create dir %u/%u",
|
||||
xlrec->tablespace_id, xlrec->db_id);
|
||||
}
|
||||
else if (info == XLOG_DBASE_DROP)
|
||||
{
|
||||
xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) rec;
|
||||
@@ -51,8 +60,11 @@ dbase_identify(uint8 info)
|
||||
|
||||
switch (info & ~XLR_INFO_MASK)
|
||||
{
|
||||
case XLOG_DBASE_CREATE:
|
||||
id = "CREATE";
|
||||
case XLOG_DBASE_CREATE_FILE_COPY:
|
||||
id = "CREATE_FILE_COPY";
|
||||
break;
|
||||
case XLOG_DBASE_CREATE_WAL_LOG:
|
||||
id = "CREATE_WAL_LOG";
|
||||
break;
|
||||
case XLOG_DBASE_DROP:
|
||||
id = "DROP";
|
||||
|
||||
@@ -484,7 +484,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
{
|
||||
/* page exists in file */
|
||||
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
|
||||
mode, NULL);
|
||||
mode, NULL, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -509,7 +509,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
ReleaseBuffer(buffer);
|
||||
}
|
||||
buffer = ReadBufferWithoutRelcache(rnode, forknum,
|
||||
P_NEW, mode, NULL);
|
||||
P_NEW, mode, NULL, true);
|
||||
}
|
||||
while (BufferGetBlockNumber(buffer) < blkno);
|
||||
/* Handle the corner case that P_NEW returns non-consecutive pages */
|
||||
@@ -519,7 +519,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
|
||||
mode, NULL);
|
||||
mode, NULL, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user