mirror of
https://github.com/postgres/postgres.git
synced 2025-05-03 22:24:49 +03:00
Revert "Skip WAL for new relfilenodes, under wal_level=minimal."
This reverts commit cb2fd7eac285b1b0a24eeb2b8ed4456b66c5a09f. Per numerous buildfarm members, it was incompatible with parallel query, and a test case assumed LP64. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
This commit is contained in:
parent
a653bd8aa7
commit
348f15e22e
@ -2171,19 +2171,16 @@ include_dir 'conf.d'
|
|||||||
levels. This parameter can only be set at server start.
|
levels. This parameter can only be set at server start.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
In <literal>minimal</literal> level, no information is logged for
|
In <literal>minimal</> level, WAL-logging of some bulk
|
||||||
permanent relations for the remainder of a transaction that creates or
|
operations can be safely skipped, which can make those
|
||||||
rewrites them. This can make operations much faster (see
|
operations much faster (see <xref linkend="populate-pitr">).
|
||||||
<xref linkend="populate-pitr">). Operations that initiate this
|
Operations in which this optimization can be applied include:
|
||||||
optimization include:
|
|
||||||
<simplelist>
|
<simplelist>
|
||||||
<member><command>ALTER ... SET TABLESPACE</command></member>
|
<member><command>CREATE TABLE AS</></member>
|
||||||
<member><command>CLUSTER</command></member>
|
<member><command>CREATE INDEX</></member>
|
||||||
<member><command>CREATE TABLE</command></member>
|
<member><command>CLUSTER</></member>
|
||||||
<member><command>REFRESH MATERIALIZED VIEW</command>
|
<member><command>COPY</> into tables that were created or truncated in the same
|
||||||
(without <option>CONCURRENTLY</option>)</member>
|
transaction</member>
|
||||||
<member><command>REINDEX</command></member>
|
|
||||||
<member><command>TRUNCATE</command></member>
|
|
||||||
</simplelist>
|
</simplelist>
|
||||||
But minimal WAL does not contain enough information to reconstruct the
|
But minimal WAL does not contain enough information to reconstruct the
|
||||||
data from a base backup and the WAL logs, so <literal>replica</> or
|
data from a base backup and the WAL logs, so <literal>replica</> or
|
||||||
@ -2572,26 +2569,6 @@ include_dir 'conf.d'
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry id="guc-wal-skip-threshold" xreflabel="wal_skip_threshold">
|
|
||||||
<term><varname>wal_skip_threshold</varname> (<type>integer</type>)
|
|
||||||
<indexterm>
|
|
||||||
<primary><varname>wal_skip_threshold</varname> configuration parameter</primary>
|
|
||||||
</indexterm>
|
|
||||||
</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
When <varname>wal_level</varname> is <literal>minimal</literal> and a
|
|
||||||
transaction commits after creating or rewriting a permanent relation,
|
|
||||||
this setting determines how to persist the new data. If the data is
|
|
||||||
smaller than this setting, write it to the WAL log; otherwise, use an
|
|
||||||
fsync of affected files. Depending on the properties of your storage,
|
|
||||||
raising or lowering this value might help if such commits are slowing
|
|
||||||
concurrent transactions. The default is two megabytes
|
|
||||||
(<literal>2MB</literal>).
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry id="guc-commit-delay" xreflabel="commit_delay">
|
<varlistentry id="guc-commit-delay" xreflabel="commit_delay">
|
||||||
<term><varname>commit_delay</varname> (<type>integer</type>)
|
<term><varname>commit_delay</varname> (<type>integer</type>)
|
||||||
<indexterm>
|
<indexterm>
|
||||||
|
@ -1394,13 +1394,42 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Aside from avoiding the time for the archiver or WAL sender to process the
|
Aside from avoiding the time for the archiver or WAL sender to
|
||||||
WAL data, doing this will actually make certain commands faster, because
|
process the WAL data,
|
||||||
they do not to write WAL at all if <varname>wal_level</varname>
|
doing this will actually make certain commands faster, because they
|
||||||
is <literal>minimal</literal> and the current subtransaction (or top-level
|
are designed not to write WAL at all if <varname>wal_level</varname>
|
||||||
transaction) created or truncated the table or index they change. (They
|
is <literal>minimal</>. (They can guarantee crash safety more cheaply
|
||||||
can guarantee crash safety more cheaply by doing
|
by doing an <function>fsync</> at the end than by writing WAL.)
|
||||||
an <function>fsync</function> at the end than by writing WAL.)
|
This applies to the following commands:
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>CREATE TABLE AS SELECT</command>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>CREATE INDEX</command> (and variants such as
|
||||||
|
<command>ALTER TABLE ADD PRIMARY KEY</command>)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>ALTER TABLE SET TABLESPACE</command>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>CLUSTER</command>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<command>COPY FROM</command>, when the target table has been
|
||||||
|
created or truncated earlier in the same transaction
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
|||||||
PageSetLSN(page, recptr);
|
PageSetLSN(page, recptr);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
PageSetLSN(page, gistGetFakeLSN(index));
|
PageSetLSN(page, gistGetFakeLSN(heap));
|
||||||
|
|
||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
|
|
||||||
|
@ -937,44 +937,23 @@ gistproperty(Oid index_oid, int attno,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
|
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
|
||||||
* splits anyway. This function provides a fake sequence of LSNs for that
|
* to detect concurrent page splits anyway. This function provides a fake
|
||||||
* purpose.
|
* sequence of LSNs for that purpose.
|
||||||
*/
|
*/
|
||||||
XLogRecPtr
|
XLogRecPtr
|
||||||
gistGetFakeLSN(Relation rel)
|
gistGetFakeLSN(Relation rel)
|
||||||
{
|
{
|
||||||
|
static XLogRecPtr counter = 1;
|
||||||
|
|
||||||
if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
|
if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Temporary relations are only accessible in our session, so a simple
|
* Temporary relations are only accessible in our session, so a simple
|
||||||
* backend-local counter will do.
|
* backend-local counter will do.
|
||||||
*/
|
*/
|
||||||
static XLogRecPtr counter = 1;
|
|
||||||
|
|
||||||
return counter++;
|
return counter++;
|
||||||
}
|
}
|
||||||
else if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* WAL-logging on this relation will start after commit, so its LSNs
|
|
||||||
* must be distinct numbers smaller than the LSN at the next commit.
|
|
||||||
* Emit a dummy WAL record if insert-LSN hasn't advanced after the
|
|
||||||
* last call.
|
|
||||||
*/
|
|
||||||
static XLogRecPtr lastlsn = InvalidXLogRecPtr;
|
|
||||||
XLogRecPtr currlsn = GetXLogInsertRecPtr();
|
|
||||||
|
|
||||||
/* Shouldn't be called for WAL-logging relations */
|
|
||||||
Assert(!RelationNeedsWAL(rel));
|
|
||||||
|
|
||||||
/* No need for an actual record if we already have a distinct LSN */
|
|
||||||
if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
|
|
||||||
currlsn = gistXLogAssignLSN();
|
|
||||||
|
|
||||||
lastlsn = currlsn;
|
|
||||||
return currlsn;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -480,9 +480,6 @@ gist_redo(XLogReaderState *record)
|
|||||||
case XLOG_GIST_CREATE_INDEX:
|
case XLOG_GIST_CREATE_INDEX:
|
||||||
gistRedoCreateIndex(record);
|
gistRedoCreateIndex(record);
|
||||||
break;
|
break;
|
||||||
case XLOG_GIST_ASSIGN_LSN:
|
|
||||||
/* nop. See gistGetFakeLSN(). */
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
elog(PANIC, "gist_redo: unknown op code %u", info);
|
elog(PANIC, "gist_redo: unknown op code %u", info);
|
||||||
}
|
}
|
||||||
@ -559,23 +556,6 @@ gistXLogSplit(bool page_is_leaf,
|
|||||||
return recptr;
|
return recptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Write an empty XLOG record to assign a distinct LSN.
|
|
||||||
*/
|
|
||||||
XLogRecPtr
|
|
||||||
gistXLogAssignLSN(void)
|
|
||||||
{
|
|
||||||
int dummy = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Records other than SWITCH_WAL must have content. We use an integer 0 to
|
|
||||||
* follow the restriction.
|
|
||||||
*/
|
|
||||||
XLogBeginInsert();
|
|
||||||
XLogRegisterData((char *) &dummy, sizeof(dummy));
|
|
||||||
return XLogInsert(RM_GIST_ID, XLOG_GIST_ASSIGN_LSN);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write XLOG record describing a page update. The update can include any
|
* Write XLOG record describing a page update. The update can include any
|
||||||
* number of deletions and/or insertions of tuples on a single index page.
|
* number of deletions and/or insertions of tuples on a single index page.
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
* heap_multi_insert - insert multiple tuples into a relation
|
* heap_multi_insert - insert multiple tuples into a relation
|
||||||
* heap_delete - delete a tuple from a relation
|
* heap_delete - delete a tuple from a relation
|
||||||
* heap_update - replace a tuple in a relation with another tuple
|
* heap_update - replace a tuple in a relation with another tuple
|
||||||
|
* heap_sync - sync heap, for when no WAL has been written
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* This file contains the heap_ routines which implement
|
* This file contains the heap_ routines which implement
|
||||||
@ -2325,6 +2326,12 @@ FreeBulkInsertState(BulkInsertState bistate)
|
|||||||
* The new tuple is stamped with current transaction ID and the specified
|
* The new tuple is stamped with current transaction ID and the specified
|
||||||
* command ID.
|
* command ID.
|
||||||
*
|
*
|
||||||
|
* If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
|
||||||
|
* logged in WAL, even for a non-temp relation. Safe usage of this behavior
|
||||||
|
* requires that we arrange that all new tuples go into new pages not
|
||||||
|
* containing any tuples from other transactions, and that the relation gets
|
||||||
|
* fsync'd before commit. (See also heap_sync() comments)
|
||||||
|
*
|
||||||
* The HEAP_INSERT_SKIP_FSM option is passed directly to
|
* The HEAP_INSERT_SKIP_FSM option is passed directly to
|
||||||
* RelationGetBufferForTuple, which see for more info.
|
* RelationGetBufferForTuple, which see for more info.
|
||||||
*
|
*
|
||||||
@ -2433,7 +2440,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
|||||||
MarkBufferDirty(buffer);
|
MarkBufferDirty(buffer);
|
||||||
|
|
||||||
/* XLOG stuff */
|
/* XLOG stuff */
|
||||||
if (RelationNeedsWAL(relation))
|
if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation))
|
||||||
{
|
{
|
||||||
xl_heap_insert xlrec;
|
xl_heap_insert xlrec;
|
||||||
xl_heap_header xlhdr;
|
xl_heap_header xlhdr;
|
||||||
@ -2641,7 +2648,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
/* currently not needed (thus unsupported) for heap_multi_insert() */
|
/* currently not needed (thus unsupported) for heap_multi_insert() */
|
||||||
AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
|
AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
|
||||||
|
|
||||||
needwal = RelationNeedsWAL(relation);
|
needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
|
||||||
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
||||||
HEAP_DEFAULT_FILLFACTOR);
|
HEAP_DEFAULT_FILLFACTOR);
|
||||||
|
|
||||||
@ -9279,13 +9286,18 @@ heap2_redo(XLogReaderState *record)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* heap_sync - for binary compatibility
|
* heap_sync - sync a heap, for use when no WAL has been written
|
||||||
*
|
*
|
||||||
* A newer PostgreSQL version removes this function. It exists here just in
|
* This forces the heap contents (including TOAST heap if any) down to disk.
|
||||||
* case an extension calls it. See "Skipping WAL for New RelFileNode" in
|
* If we skipped using WAL, and WAL is otherwise needed, we must force the
|
||||||
* src/backend/access/transam/README for the system that superseded it,
|
* relation down to disk before it's safe to commit the transaction. This
|
||||||
* allowing removal of most calls. Cases like copy_relation_data() should
|
* requires writing out any dirty buffers and then doing a forced fsync.
|
||||||
* call smgrimmedsync() directly.
|
*
|
||||||
|
* Indexes are not touched. (Currently, index operations associated with
|
||||||
|
* the commands that use this are WAL-logged and so do not need fsync.
|
||||||
|
* That behavior might change someday, but in any case it's likely that
|
||||||
|
* any fsync decisions required would be per-index and hence not appropriate
|
||||||
|
* to be done here.)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
heap_sync(Relation rel)
|
heap_sync(Relation rel)
|
||||||
|
@ -143,6 +143,7 @@ typedef struct RewriteStateData
|
|||||||
Page rs_buffer; /* page currently being built */
|
Page rs_buffer; /* page currently being built */
|
||||||
BlockNumber rs_blockno; /* block where page will go */
|
BlockNumber rs_blockno; /* block where page will go */
|
||||||
bool rs_buffer_valid; /* T if any tuples in buffer */
|
bool rs_buffer_valid; /* T if any tuples in buffer */
|
||||||
|
bool rs_use_wal; /* must we WAL-log inserts? */
|
||||||
bool rs_logical_rewrite; /* do we need to do logical rewriting */
|
bool rs_logical_rewrite; /* do we need to do logical rewriting */
|
||||||
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
|
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
|
||||||
* determine tuple visibility */
|
* determine tuple visibility */
|
||||||
@ -236,13 +237,15 @@ static void logical_end_heap_rewrite(RewriteState state);
|
|||||||
* oldest_xmin xid used by the caller to determine which tuples are dead
|
* oldest_xmin xid used by the caller to determine which tuples are dead
|
||||||
* freeze_xid xid before which tuples will be frozen
|
* freeze_xid xid before which tuples will be frozen
|
||||||
* min_multi multixact before which multis will be removed
|
* min_multi multixact before which multis will be removed
|
||||||
|
* use_wal should the inserts to the new heap be WAL-logged?
|
||||||
*
|
*
|
||||||
* Returns an opaque RewriteState, allocated in current memory context,
|
* Returns an opaque RewriteState, allocated in current memory context,
|
||||||
* to be used in subsequent calls to the other functions.
|
* to be used in subsequent calls to the other functions.
|
||||||
*/
|
*/
|
||||||
RewriteState
|
RewriteState
|
||||||
begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
|
begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
|
||||||
TransactionId freeze_xid, MultiXactId cutoff_multi)
|
TransactionId freeze_xid, MultiXactId cutoff_multi,
|
||||||
|
bool use_wal)
|
||||||
{
|
{
|
||||||
RewriteState state;
|
RewriteState state;
|
||||||
MemoryContext rw_cxt;
|
MemoryContext rw_cxt;
|
||||||
@ -267,6 +270,7 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
|
|||||||
/* new_heap needn't be empty, just locked */
|
/* new_heap needn't be empty, just locked */
|
||||||
state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
|
state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
|
||||||
state->rs_buffer_valid = false;
|
state->rs_buffer_valid = false;
|
||||||
|
state->rs_use_wal = use_wal;
|
||||||
state->rs_oldest_xmin = oldest_xmin;
|
state->rs_oldest_xmin = oldest_xmin;
|
||||||
state->rs_freeze_xid = freeze_xid;
|
state->rs_freeze_xid = freeze_xid;
|
||||||
state->rs_cutoff_multi = cutoff_multi;
|
state->rs_cutoff_multi = cutoff_multi;
|
||||||
@ -325,7 +329,7 @@ end_heap_rewrite(RewriteState state)
|
|||||||
/* Write the last page, if any */
|
/* Write the last page, if any */
|
||||||
if (state->rs_buffer_valid)
|
if (state->rs_buffer_valid)
|
||||||
{
|
{
|
||||||
if (RelationNeedsWAL(state->rs_new_rel))
|
if (state->rs_use_wal)
|
||||||
log_newpage(&state->rs_new_rel->rd_node,
|
log_newpage(&state->rs_new_rel->rd_node,
|
||||||
MAIN_FORKNUM,
|
MAIN_FORKNUM,
|
||||||
state->rs_blockno,
|
state->rs_blockno,
|
||||||
@ -340,14 +344,18 @@ end_heap_rewrite(RewriteState state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we WAL-logged rel pages, we must nonetheless fsync them. The
|
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
|
||||||
|
* to ensure that the toast table gets fsync'd too.
|
||||||
|
*
|
||||||
|
* It's obvious that we must do this when not WAL-logging. It's less
|
||||||
|
* obvious that we have to do it even if we did WAL-log the pages. The
|
||||||
* reason is the same as in tablecmds.c's copy_relation_data(): we're
|
* reason is the same as in tablecmds.c's copy_relation_data(): we're
|
||||||
* writing data that's not in shared buffers, and so a CHECKPOINT
|
* writing data that's not in shared buffers, and so a CHECKPOINT
|
||||||
* occurring during the rewriteheap operation won't have fsync'd data we
|
* occurring during the rewriteheap operation won't have fsync'd data we
|
||||||
* wrote before the checkpoint.
|
* wrote before the checkpoint.
|
||||||
*/
|
*/
|
||||||
if (RelationNeedsWAL(state->rs_new_rel))
|
if (RelationNeedsWAL(state->rs_new_rel))
|
||||||
smgrimmedsync(state->rs_new_rel->rd_smgr, MAIN_FORKNUM);
|
heap_sync(state->rs_new_rel);
|
||||||
|
|
||||||
logical_end_heap_rewrite(state);
|
logical_end_heap_rewrite(state);
|
||||||
|
|
||||||
@ -644,6 +652,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
|||||||
{
|
{
|
||||||
int options = HEAP_INSERT_SKIP_FSM;
|
int options = HEAP_INSERT_SKIP_FSM;
|
||||||
|
|
||||||
|
if (!state->rs_use_wal)
|
||||||
|
options |= HEAP_INSERT_SKIP_WAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* While rewriting the heap for VACUUM FULL / CLUSTER, make sure data
|
* While rewriting the heap for VACUUM FULL / CLUSTER, make sure data
|
||||||
* for the TOAST table are not logically decoded. The main heap is
|
* for the TOAST table are not logically decoded. The main heap is
|
||||||
@ -682,7 +693,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
|||||||
/* Doesn't fit, so write out the existing page */
|
/* Doesn't fit, so write out the existing page */
|
||||||
|
|
||||||
/* XLOG stuff */
|
/* XLOG stuff */
|
||||||
if (RelationNeedsWAL(state->rs_new_rel))
|
if (state->rs_use_wal)
|
||||||
log_newpage(&state->rs_new_rel->rd_node,
|
log_newpage(&state->rs_new_rel->rd_node,
|
||||||
MAIN_FORKNUM,
|
MAIN_FORKNUM,
|
||||||
state->rs_blockno,
|
state->rs_blockno,
|
||||||
|
@ -40,6 +40,18 @@
|
|||||||
* them. They will need to be re-read into shared buffers on first use after
|
* them. They will need to be re-read into shared buffers on first use after
|
||||||
* the build finishes.
|
* the build finishes.
|
||||||
*
|
*
|
||||||
|
* Since the index will never be used unless it is completely built,
|
||||||
|
* from a crash-recovery point of view there is no need to WAL-log the
|
||||||
|
* steps of the build. After completing the index build, we can just sync
|
||||||
|
* the whole file to disk using smgrimmedsync() before exiting this module.
|
||||||
|
* This can be seen to be sufficient for crash recovery by considering that
|
||||||
|
* it's effectively equivalent to what would happen if a CHECKPOINT occurred
|
||||||
|
* just after the index build. However, it is clearly not sufficient if the
|
||||||
|
* DBA is using the WAL log for PITR or replication purposes, since another
|
||||||
|
* machine would not be able to reconstruct the index from WAL. Therefore,
|
||||||
|
* we log the completed index pages to WAL if and only if WAL archiving is
|
||||||
|
* active.
|
||||||
|
*
|
||||||
* This code isn't concerned about the FSM at all. The caller is responsible
|
* This code isn't concerned about the FSM at all. The caller is responsible
|
||||||
* for initializing that.
|
* for initializing that.
|
||||||
*
|
*
|
||||||
@ -204,7 +216,12 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
|||||||
|
|
||||||
wstate.heap = btspool->heap;
|
wstate.heap = btspool->heap;
|
||||||
wstate.index = btspool->index;
|
wstate.index = btspool->index;
|
||||||
wstate.btws_use_wal = RelationNeedsWAL(wstate.index);
|
|
||||||
|
/*
|
||||||
|
* We need to log index creation in WAL iff WAL archiving/streaming is
|
||||||
|
* enabled UNLESS the index isn't WAL-logged anyway.
|
||||||
|
*/
|
||||||
|
wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
|
||||||
|
|
||||||
/* reserve the metapage */
|
/* reserve the metapage */
|
||||||
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
|
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
|
||||||
@ -794,15 +811,21 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
|||||||
_bt_uppershutdown(wstate, state);
|
_bt_uppershutdown(wstate, state);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we WAL-logged index pages, we must nonetheless fsync index files.
|
* If the index is WAL-logged, we must fsync it down to disk before it's
|
||||||
* Since we're building outside shared buffers, a CHECKPOINT occurring
|
* safe to commit the transaction. (For a non-WAL-logged index we don't
|
||||||
* during the build has no way to flush the previously written data to
|
* care since the index will be uninteresting after a crash anyway.)
|
||||||
* disk (indeed it won't know the index even exists). A crash later on
|
*
|
||||||
* would replay WAL from the checkpoint, therefore it wouldn't replay our
|
* It's obvious that we must do this when not WAL-logging the build. It's
|
||||||
* earlier WAL entries. If we do not fsync those pages here, they might
|
* less obvious that we have to do it even if we did WAL-log the index
|
||||||
* still not be on disk when the crash occurs.
|
* pages. The reason is that since we're building outside shared buffers,
|
||||||
|
* a CHECKPOINT occurring during the build has no way to flush the
|
||||||
|
* previously written data to disk (indeed it won't know the index even
|
||||||
|
* exists). A crash later on would replay WAL from the checkpoint,
|
||||||
|
* therefore it wouldn't replay our earlier WAL entries. If we do not
|
||||||
|
* fsync those pages here, they might still not be on disk when the crash
|
||||||
|
* occurs.
|
||||||
*/
|
*/
|
||||||
if (wstate->btws_use_wal)
|
if (RelationNeedsWAL(wstate->index))
|
||||||
{
|
{
|
||||||
RelationOpenSmgr(wstate->index);
|
RelationOpenSmgr(wstate->index);
|
||||||
smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM);
|
smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM);
|
||||||
|
@ -46,9 +46,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
|
|||||||
break;
|
break;
|
||||||
case XLOG_GIST_CREATE_INDEX:
|
case XLOG_GIST_CREATE_INDEX:
|
||||||
break;
|
break;
|
||||||
case XLOG_GIST_ASSIGN_LSN:
|
|
||||||
/* No details to write out */
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,9 +65,6 @@ gist_identify(uint8 info)
|
|||||||
case XLOG_GIST_CREATE_INDEX:
|
case XLOG_GIST_CREATE_INDEX:
|
||||||
id = "CREATE_INDEX";
|
id = "CREATE_INDEX";
|
||||||
break;
|
break;
|
||||||
case XLOG_GIST_ASSIGN_LSN:
|
|
||||||
id = "ASSIGN_LSN";
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return id;
|
return id;
|
||||||
|
@ -717,38 +717,6 @@ then restart recovery. This is part of the reason for not writing a WAL
|
|||||||
entry until we've successfully done the original action.
|
entry until we've successfully done the original action.
|
||||||
|
|
||||||
|
|
||||||
Skipping WAL for New RelFileNode
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
Under wal_level=minimal, if a change modifies a relfilenode that ROLLBACK
|
|
||||||
would unlink, in-tree access methods write no WAL for that change. Code that
|
|
||||||
writes WAL without calling RelationNeedsWAL() must check for this case. This
|
|
||||||
skipping is mandatory. If a WAL-writing change preceded a WAL-skipping change
|
|
||||||
for the same block, REDO could overwrite the WAL-skipping change. If a
|
|
||||||
WAL-writing change followed a WAL-skipping change for the same block, a
|
|
||||||
related problem would arise. When a WAL record contains no full-page image,
|
|
||||||
REDO expects the page to match its contents from just before record insertion.
|
|
||||||
A WAL-skipping change may not reach disk at all, violating REDO's expectation
|
|
||||||
under full_page_writes=off. For any access method, CommitTransaction() writes
|
|
||||||
and fsyncs affected blocks before recording the commit.
|
|
||||||
|
|
||||||
Prefer to do the same in future access methods. However, two other approaches
|
|
||||||
can work. First, an access method can irreversibly transition a given fork
|
|
||||||
from WAL-skipping to WAL-writing by calling FlushRelationBuffers() and
|
|
||||||
smgrimmedsync(). Second, an access method can opt to write WAL
|
|
||||||
unconditionally for permanent relations. Under these approaches, the access
|
|
||||||
method callbacks must not call functions that react to RelationNeedsWAL().
|
|
||||||
|
|
||||||
This applies only to WAL records whose replay would modify bytes stored in the
|
|
||||||
new relfilenode. It does not apply to other records about the relfilenode,
|
|
||||||
such as XLOG_SMGR_CREATE. Because it operates at the level of individual
|
|
||||||
relfilenodes, RelationNeedsWAL() can differ for tightly-coupled relations.
|
|
||||||
Consider "CREATE TABLE t (); BEGIN; ALTER TABLE t ADD c text; ..." in which
|
|
||||||
ALTER TABLE adds a TOAST relation. The TOAST relation will skip WAL, while
|
|
||||||
the table owning it will not. ALTER TABLE SET TABLESPACE will cause a table
|
|
||||||
to skip WAL, but that won't affect its indexes.
|
|
||||||
|
|
||||||
|
|
||||||
Asynchronous Commit
|
Asynchronous Commit
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
@ -852,12 +820,13 @@ Changes to a temp table are not WAL-logged, hence could reach disk in
|
|||||||
advance of T1's commit, but we don't care since temp table contents don't
|
advance of T1's commit, but we don't care since temp table contents don't
|
||||||
survive crashes anyway.
|
survive crashes anyway.
|
||||||
|
|
||||||
Database writes that skip WAL for new relfilenodes are also safe. In these
|
Database writes made via any of the paths we have introduced to avoid WAL
|
||||||
cases it's entirely possible for the data to reach disk before T1's commit,
|
overhead for bulk updates are also safe. In these cases it's entirely
|
||||||
because T1 will fsync it down to disk without any sort of interlock. However,
|
possible for the data to reach disk before T1's commit, because T1 will
|
||||||
all these paths are designed to write data that no other transaction can see
|
fsync it down to disk without any sort of interlock, as soon as it finishes
|
||||||
until after T1 commits. The situation is thus not different from ordinary
|
the bulk update. However, all these paths are designed to write data that
|
||||||
WAL-logged updates.
|
no other transaction can see until after T1 commits. The situation is thus
|
||||||
|
not different from ordinary WAL-logged updates.
|
||||||
|
|
||||||
Transaction Emulation during Recovery
|
Transaction Emulation during Recovery
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
@ -2032,13 +2032,6 @@ CommitTransaction(void)
|
|||||||
*/
|
*/
|
||||||
PreCommit_on_commit_actions();
|
PreCommit_on_commit_actions();
|
||||||
|
|
||||||
/*
|
|
||||||
* Synchronize files that are created and not WAL-logged during this
|
|
||||||
* transaction. This must happen before AtEOXact_RelationMap(), so that we
|
|
||||||
* don't see committed-but-broken files after a crash.
|
|
||||||
*/
|
|
||||||
smgrDoPendingSyncs(true);
|
|
||||||
|
|
||||||
/* close large objects before lower-level cleanup */
|
/* close large objects before lower-level cleanup */
|
||||||
AtEOXact_LargeObject(true);
|
AtEOXact_LargeObject(true);
|
||||||
|
|
||||||
@ -2267,13 +2260,6 @@ PrepareTransaction(void)
|
|||||||
*/
|
*/
|
||||||
PreCommit_on_commit_actions();
|
PreCommit_on_commit_actions();
|
||||||
|
|
||||||
/*
|
|
||||||
* Synchronize files that are created and not WAL-logged during this
|
|
||||||
* transaction. This must happen before EndPrepare(), so that we don't see
|
|
||||||
* committed-but-broken files after a crash and COMMIT PREPARED.
|
|
||||||
*/
|
|
||||||
smgrDoPendingSyncs(true);
|
|
||||||
|
|
||||||
/* close large objects before lower-level cleanup */
|
/* close large objects before lower-level cleanup */
|
||||||
AtEOXact_LargeObject(true);
|
AtEOXact_LargeObject(true);
|
||||||
|
|
||||||
@ -2574,7 +2560,6 @@ AbortTransaction(void)
|
|||||||
*/
|
*/
|
||||||
AfterTriggerEndXact(false); /* 'false' means it's abort */
|
AfterTriggerEndXact(false); /* 'false' means it's abort */
|
||||||
AtAbort_Portals();
|
AtAbort_Portals();
|
||||||
smgrDoPendingSyncs(false);
|
|
||||||
AtEOXact_LargeObject(false);
|
AtEOXact_LargeObject(false);
|
||||||
AtAbort_Notify();
|
AtAbort_Notify();
|
||||||
AtEOXact_RelationMap(false);
|
AtEOXact_RelationMap(false);
|
||||||
|
@ -542,8 +542,6 @@ typedef FakeRelCacheEntryData *FakeRelCacheEntry;
|
|||||||
* fields related to physical storage, like rd_rel, are initialized, so the
|
* fields related to physical storage, like rd_rel, are initialized, so the
|
||||||
* fake entry is only usable in low-level operations like ReadBuffer().
|
* fake entry is only usable in low-level operations like ReadBuffer().
|
||||||
*
|
*
|
||||||
* This is also used for syncing WAL-skipped files.
|
|
||||||
*
|
|
||||||
* Caller must free the returned entry with FreeFakeRelcacheEntry().
|
* Caller must free the returned entry with FreeFakeRelcacheEntry().
|
||||||
*/
|
*/
|
||||||
Relation
|
Relation
|
||||||
@ -552,20 +550,18 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
|
|||||||
FakeRelCacheEntry fakeentry;
|
FakeRelCacheEntry fakeentry;
|
||||||
Relation rel;
|
Relation rel;
|
||||||
|
|
||||||
|
Assert(InRecovery);
|
||||||
|
|
||||||
/* Allocate the Relation struct and all related space in one block. */
|
/* Allocate the Relation struct and all related space in one block. */
|
||||||
fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
|
fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
|
||||||
rel = (Relation) fakeentry;
|
rel = (Relation) fakeentry;
|
||||||
|
|
||||||
rel->rd_rel = &fakeentry->pgc;
|
rel->rd_rel = &fakeentry->pgc;
|
||||||
rel->rd_node = rnode;
|
rel->rd_node = rnode;
|
||||||
|
/* We will never be working with temp rels during recovery */
|
||||||
/*
|
|
||||||
* We will never be working with temp rels during recovery or while
|
|
||||||
* syncing WAL-skipped files.
|
|
||||||
*/
|
|
||||||
rel->rd_backend = InvalidBackendId;
|
rel->rd_backend = InvalidBackendId;
|
||||||
|
|
||||||
/* It must be a permanent table here */
|
/* It must be a permanent table if we're in recovery. */
|
||||||
rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
|
rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
|
||||||
|
|
||||||
/* We don't know the name of the relation; use relfilenode instead */
|
/* We don't know the name of the relation; use relfilenode instead */
|
||||||
@ -574,9 +570,9 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
|
|||||||
/*
|
/*
|
||||||
* We set up the lockRelId in case anything tries to lock the dummy
|
* We set up the lockRelId in case anything tries to lock the dummy
|
||||||
* relation. Note that this is fairly bogus since relNode may be
|
* relation. Note that this is fairly bogus since relNode may be
|
||||||
* different from the relation's OID. It shouldn't really matter though.
|
* different from the relation's OID. It shouldn't really matter though,
|
||||||
* In recovery, we are running by ourselves and can't have any lock
|
* since we are presumably running by ourselves and can't have any lock
|
||||||
* conflicts. While syncing, we already hold AccessExclusiveLock.
|
* conflicts ...
|
||||||
*/
|
*/
|
||||||
rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
|
rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
|
||||||
rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
|
rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
|
||||||
|
@ -299,8 +299,6 @@ Boot_DeclareIndexStmt:
|
|||||||
stmt->idxcomment = NULL;
|
stmt->idxcomment = NULL;
|
||||||
stmt->indexOid = InvalidOid;
|
stmt->indexOid = InvalidOid;
|
||||||
stmt->oldNode = InvalidOid;
|
stmt->oldNode = InvalidOid;
|
||||||
stmt->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
stmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
stmt->unique = false;
|
stmt->unique = false;
|
||||||
stmt->primary = false;
|
stmt->primary = false;
|
||||||
stmt->isconstraint = false;
|
stmt->isconstraint = false;
|
||||||
@ -344,8 +342,6 @@ Boot_DeclareUniqueIndexStmt:
|
|||||||
stmt->idxcomment = NULL;
|
stmt->idxcomment = NULL;
|
||||||
stmt->indexOid = InvalidOid;
|
stmt->indexOid = InvalidOid;
|
||||||
stmt->oldNode = InvalidOid;
|
stmt->oldNode = InvalidOid;
|
||||||
stmt->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
stmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
stmt->unique = true;
|
stmt->unique = true;
|
||||||
stmt->primary = false;
|
stmt->primary = false;
|
||||||
stmt->isconstraint = false;
|
stmt->isconstraint = false;
|
||||||
|
@ -27,16 +27,11 @@
|
|||||||
#include "catalog/catalog.h"
|
#include "catalog/catalog.h"
|
||||||
#include "catalog/storage.h"
|
#include "catalog/storage.h"
|
||||||
#include "catalog/storage_xlog.h"
|
#include "catalog/storage_xlog.h"
|
||||||
#include "miscadmin.h"
|
|
||||||
#include "storage/freespace.h"
|
#include "storage/freespace.h"
|
||||||
#include "storage/smgr.h"
|
#include "storage/smgr.h"
|
||||||
#include "utils/hsearch.h"
|
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
|
|
||||||
/* GUC variables */
|
|
||||||
int wal_skip_threshold = 2048; /* in kilobytes */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We keep a list of all relations (represented as RelFileNode values)
|
* We keep a list of all relations (represented as RelFileNode values)
|
||||||
* that have been created or deleted in the current transaction. When
|
* that have been created or deleted in the current transaction. When
|
||||||
@ -66,14 +61,7 @@ typedef struct PendingRelDelete
|
|||||||
struct PendingRelDelete *next; /* linked-list link */
|
struct PendingRelDelete *next; /* linked-list link */
|
||||||
} PendingRelDelete;
|
} PendingRelDelete;
|
||||||
|
|
||||||
typedef struct pendingSync
|
|
||||||
{
|
|
||||||
RelFileNode rnode;
|
|
||||||
bool is_truncated; /* Has the file experienced truncation? */
|
|
||||||
} pendingSync;
|
|
||||||
|
|
||||||
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
|
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
|
||||||
HTAB *pendingSyncHash = NULL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationCreateStorage
|
* RelationCreateStorage
|
||||||
@ -128,37 +116,6 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
|
|||||||
pending->nestLevel = GetCurrentTransactionNestLevel();
|
pending->nestLevel = GetCurrentTransactionNestLevel();
|
||||||
pending->next = pendingDeletes;
|
pending->next = pendingDeletes;
|
||||||
pendingDeletes = pending;
|
pendingDeletes = pending;
|
||||||
|
|
||||||
/*
|
|
||||||
* Queue an at-commit sync. Bootstrap does not need syncs, because initdb
|
|
||||||
* syncs at the end. During bootstrap, mdexists() creates the specified
|
|
||||||
* file; smgrDoPendingSyncs() would not cope with that.
|
|
||||||
*/
|
|
||||||
if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded() &&
|
|
||||||
!IsBootstrapProcessingMode())
|
|
||||||
{
|
|
||||||
pendingSync *pending;
|
|
||||||
bool found;
|
|
||||||
|
|
||||||
/* we sync only permanent relations */
|
|
||||||
Assert(backend == InvalidBackendId);
|
|
||||||
|
|
||||||
if (!pendingSyncHash)
|
|
||||||
{
|
|
||||||
HASHCTL ctl;
|
|
||||||
|
|
||||||
ctl.keysize = sizeof(RelFileNode);
|
|
||||||
ctl.entrysize = sizeof(pendingSync);
|
|
||||||
ctl.hcxt = TopTransactionContext;
|
|
||||||
pendingSyncHash =
|
|
||||||
hash_create("pending sync hash",
|
|
||||||
16, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
pending = hash_search(pendingSyncHash, &rnode, HASH_ENTER, &found);
|
|
||||||
Assert(!found);
|
|
||||||
pending->is_truncated = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -292,8 +249,6 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
|||||||
if (vm)
|
if (vm)
|
||||||
visibilitymap_truncate(rel, nblocks);
|
visibilitymap_truncate(rel, nblocks);
|
||||||
|
|
||||||
RelationPreTruncate(rel);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We WAL-log the truncation before actually truncating, which means
|
* We WAL-log the truncation before actually truncating, which means
|
||||||
* trouble if the truncation fails. If we then crash, the WAL replay
|
* trouble if the truncation fails. If we then crash, the WAL replay
|
||||||
@ -336,49 +291,6 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
|||||||
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
|
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* RelationPreTruncate
|
|
||||||
* Perform AM-independent work before a physical truncation.
|
|
||||||
*
|
|
||||||
* If an access method's relation_nontransactional_truncate does not call
|
|
||||||
* RelationTruncate(), it must call this before decreasing the table size.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
RelationPreTruncate(Relation rel)
|
|
||||||
{
|
|
||||||
pendingSync *pending;
|
|
||||||
|
|
||||||
if (!pendingSyncHash)
|
|
||||||
return;
|
|
||||||
RelationOpenSmgr(rel);
|
|
||||||
|
|
||||||
pending = hash_search(pendingSyncHash, &(rel->rd_smgr->smgr_rnode.node),
|
|
||||||
HASH_FIND, NULL);
|
|
||||||
if (pending)
|
|
||||||
pending->is_truncated = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* RelFileNodeSkippingWAL - check if a BM_PERMANENT relfilenode is using WAL
|
|
||||||
*
|
|
||||||
* Changes of certain relfilenodes must not write WAL; see "Skipping WAL for
|
|
||||||
* New RelFileNode" in src/backend/access/transam/README. Though it is
|
|
||||||
* known from Relation efficiently, this function is intended for the code
|
|
||||||
* paths not having access to Relation.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
RelFileNodeSkippingWAL(RelFileNode rnode)
|
|
||||||
{
|
|
||||||
if (XLogIsNeeded())
|
|
||||||
return false; /* no permanent relfilenode skips WAL */
|
|
||||||
|
|
||||||
if (!pendingSyncHash ||
|
|
||||||
hash_search(pendingSyncHash, &rnode, HASH_FIND, NULL) == NULL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
|
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
|
||||||
*
|
*
|
||||||
@ -456,144 +368,6 @@ smgrDoPendingDeletes(bool isCommit)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* smgrDoPendingSyncs() -- Take care of relation syncs at end of xact.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
smgrDoPendingSyncs(bool isCommit)
|
|
||||||
{
|
|
||||||
PendingRelDelete *pending;
|
|
||||||
int nrels = 0,
|
|
||||||
maxrels = 0;
|
|
||||||
SMgrRelation *srels = NULL;
|
|
||||||
HASH_SEQ_STATUS scan;
|
|
||||||
pendingSync *pendingsync;
|
|
||||||
|
|
||||||
if (XLogIsNeeded())
|
|
||||||
return; /* no relation can use this */
|
|
||||||
|
|
||||||
Assert(GetCurrentTransactionNestLevel() == 1);
|
|
||||||
|
|
||||||
if (!pendingSyncHash)
|
|
||||||
return; /* no relation needs sync */
|
|
||||||
|
|
||||||
/* Just throw away all pending syncs if any at rollback */
|
|
||||||
if (!isCommit)
|
|
||||||
{
|
|
||||||
pendingSyncHash = NULL;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
AssertPendingSyncs_RelationCache();
|
|
||||||
|
|
||||||
/* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
|
|
||||||
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
|
|
||||||
{
|
|
||||||
if (!pending->atCommit)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
(void) hash_search(pendingSyncHash, (void *) &pending->relnode,
|
|
||||||
HASH_REMOVE, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
hash_seq_init(&scan, pendingSyncHash);
|
|
||||||
while ((pendingsync = (pendingSync *) hash_seq_search(&scan)))
|
|
||||||
{
|
|
||||||
ForkNumber fork;
|
|
||||||
BlockNumber nblocks[MAX_FORKNUM + 1];
|
|
||||||
BlockNumber total_blocks = 0;
|
|
||||||
SMgrRelation srel;
|
|
||||||
|
|
||||||
srel = smgropen(pendingsync->rnode, InvalidBackendId);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We emit newpage WAL records for smaller relations.
|
|
||||||
*
|
|
||||||
* Small WAL records have a chance to be emitted along with other
|
|
||||||
* backends' WAL records. We emit WAL records instead of syncing for
|
|
||||||
* files that are smaller than a certain threshold, expecting faster
|
|
||||||
* commit. The threshold is defined by the GUC wal_skip_threshold.
|
|
||||||
*/
|
|
||||||
if (!pendingsync->is_truncated)
|
|
||||||
{
|
|
||||||
for (fork = 0; fork <= MAX_FORKNUM; fork++)
|
|
||||||
{
|
|
||||||
if (smgrexists(srel, fork))
|
|
||||||
{
|
|
||||||
BlockNumber n = smgrnblocks(srel, fork);
|
|
||||||
|
|
||||||
/* we shouldn't come here for unlogged relations */
|
|
||||||
Assert(fork != INIT_FORKNUM);
|
|
||||||
nblocks[fork] = n;
|
|
||||||
total_blocks += n;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nblocks[fork] = InvalidBlockNumber;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Sync file or emit WAL records for its contents.
|
|
||||||
*
|
|
||||||
* Although we emit WAL record if the file is small enough, do file
|
|
||||||
* sync regardless of the size if the file has experienced a
|
|
||||||
* truncation. It is because the file would be followed by trailing
|
|
||||||
* garbage blocks after a crash recovery if, while a past longer file
|
|
||||||
* had been flushed out, we omitted syncing-out of the file and
|
|
||||||
* emitted WAL instead. You might think that we could choose WAL if
|
|
||||||
* the current main fork is longer than ever, but there's a case where
|
|
||||||
* main fork is longer than ever but FSM fork gets shorter.
|
|
||||||
*/
|
|
||||||
if (pendingsync->is_truncated ||
|
|
||||||
total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
|
|
||||||
{
|
|
||||||
/* allocate the initial array, or extend it, if needed */
|
|
||||||
if (maxrels == 0)
|
|
||||||
{
|
|
||||||
maxrels = 8;
|
|
||||||
srels = palloc(sizeof(SMgrRelation) * maxrels);
|
|
||||||
}
|
|
||||||
else if (maxrels <= nrels)
|
|
||||||
{
|
|
||||||
maxrels *= 2;
|
|
||||||
srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
|
|
||||||
}
|
|
||||||
|
|
||||||
srels[nrels++] = srel;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Emit WAL records for all blocks. The file is small enough. */
|
|
||||||
for (fork = 0; fork <= MAX_FORKNUM; fork++)
|
|
||||||
{
|
|
||||||
int n = nblocks[fork];
|
|
||||||
Relation rel;
|
|
||||||
|
|
||||||
if (!BlockNumberIsValid(n))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Emit WAL for the whole file. Unfortunately we don't know
|
|
||||||
* what kind of a page this is, so we have to log the full
|
|
||||||
* page including any unused space. ReadBufferExtended()
|
|
||||||
* counts some pgstat events; unfortunately, we discard them.
|
|
||||||
*/
|
|
||||||
rel = CreateFakeRelcacheEntry(srel->smgr_rnode.node);
|
|
||||||
log_newpage_range(rel, fork, 0, n, false);
|
|
||||||
FreeFakeRelcacheEntry(rel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pendingSyncHash = NULL;
|
|
||||||
|
|
||||||
if (nrels > 0)
|
|
||||||
{
|
|
||||||
smgrdosyncall(srels, nrels);
|
|
||||||
pfree(srels);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
|
* smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
|
||||||
*
|
*
|
||||||
|
@ -747,6 +747,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
|
|||||||
bool *isnull;
|
bool *isnull;
|
||||||
IndexScanDesc indexScan;
|
IndexScanDesc indexScan;
|
||||||
HeapScanDesc heapScan;
|
HeapScanDesc heapScan;
|
||||||
|
bool use_wal;
|
||||||
bool is_system_catalog;
|
bool is_system_catalog;
|
||||||
TransactionId OldestXmin;
|
TransactionId OldestXmin;
|
||||||
TransactionId FreezeXid;
|
TransactionId FreezeXid;
|
||||||
@ -802,9 +803,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
|
|||||||
LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
|
LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Valid smgr_targblock implies something already wrote to the relation.
|
* We need to log the copied data in WAL iff WAL archiving/streaming is
|
||||||
* This may be harmless, but this function hasn't planned for it.
|
* enabled AND it's a WAL-logged rel.
|
||||||
*/
|
*/
|
||||||
|
use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
|
||||||
|
|
||||||
|
/* use_wal off requires smgr_targblock be initially invalid */
|
||||||
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
|
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -872,7 +876,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
|
|||||||
|
|
||||||
/* Initialize the rewrite operation */
|
/* Initialize the rewrite operation */
|
||||||
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
|
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
|
||||||
MultiXactCutoff);
|
MultiXactCutoff, use_wal);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
|
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
|
||||||
@ -1242,25 +1246,6 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
|
|||||||
*mapped_tables++ = r2;
|
*mapped_tables++ = r2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Recognize that rel1's relfilenode (swapped from rel2) is new in this
|
|
||||||
* subtransaction. The rel2 storage (swapped from rel1) may or may not be
|
|
||||||
* new.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
Relation rel1,
|
|
||||||
rel2;
|
|
||||||
|
|
||||||
rel1 = relation_open(r1, NoLock);
|
|
||||||
rel2 = relation_open(r2, NoLock);
|
|
||||||
rel2->rd_createSubid = rel1->rd_createSubid;
|
|
||||||
rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
|
|
||||||
rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
|
|
||||||
RelationAssumeNewRelfilenode(rel1);
|
|
||||||
relation_close(rel1, NoLock);
|
|
||||||
relation_close(rel2, NoLock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the case of a shared catalog, these next few steps will only affect
|
* In the case of a shared catalog, these next few steps will only affect
|
||||||
* our own database's pg_class row; but that's okay, because they are all
|
* our own database's pg_class row; but that's okay, because they are all
|
||||||
|
@ -2310,14 +2310,49 @@ CopyFrom(CopyState cstate)
|
|||||||
|
|
||||||
tupDesc = RelationGetDescr(cstate->rel);
|
tupDesc = RelationGetDescr(cstate->rel);
|
||||||
|
|
||||||
/*
|
/*----------
|
||||||
* If the target file is new-in-transaction, we assume that checking FSM
|
* Check to see if we can avoid writing WAL
|
||||||
* for free space is a waste of time. This could possibly be wrong, but
|
*
|
||||||
* it's unlikely.
|
* If archive logging/streaming is not enabled *and* either
|
||||||
|
* - table was created in same transaction as this COPY
|
||||||
|
* - data is being written to relfilenode created in this transaction
|
||||||
|
* then we can skip writing WAL. It's safe because if the transaction
|
||||||
|
* doesn't commit, we'll discard the table (or the new relfilenode file).
|
||||||
|
* If it does commit, we'll have done the heap_sync at the bottom of this
|
||||||
|
* routine first.
|
||||||
|
*
|
||||||
|
* As mentioned in comments in utils/rel.h, the in-same-transaction test
|
||||||
|
* is not always set correctly, since in rare cases rd_newRelfilenodeSubid
|
||||||
|
* can be cleared before the end of the transaction. The exact case is
|
||||||
|
* when a relation sets a new relfilenode twice in same transaction, yet
|
||||||
|
* the second one fails in an aborted subtransaction, e.g.
|
||||||
|
*
|
||||||
|
* BEGIN;
|
||||||
|
* TRUNCATE t;
|
||||||
|
* SAVEPOINT save;
|
||||||
|
* TRUNCATE t;
|
||||||
|
* ROLLBACK TO save;
|
||||||
|
* COPY ...
|
||||||
|
*
|
||||||
|
* Also, if the target file is new-in-transaction, we assume that checking
|
||||||
|
* FSM for free space is a waste of time, even if we must use WAL because
|
||||||
|
* of archiving. This could possibly be wrong, but it's unlikely.
|
||||||
|
*
|
||||||
|
* The comments for heap_insert and RelationGetBufferForTuple specify that
|
||||||
|
* skipping WAL logging is only safe if we ensure that our tuples do not
|
||||||
|
* go into pages containing tuples from any other transactions --- but this
|
||||||
|
* must be the case if we have a new table or new relfilenode, so we need
|
||||||
|
* no additional work to enforce that.
|
||||||
|
*----------
|
||||||
*/
|
*/
|
||||||
|
/* createSubid is creation check, newRelfilenodeSubid is truncation check */
|
||||||
if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
|
if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
|
||||||
cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
|
cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
|
||||||
|
{
|
||||||
hi_options |= HEAP_INSERT_SKIP_FSM;
|
hi_options |= HEAP_INSERT_SKIP_FSM;
|
||||||
|
if (!XLogIsNeeded())
|
||||||
|
hi_options |= HEAP_INSERT_SKIP_WAL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Optimize if new relfilenode was created in this subxact or one of its
|
* Optimize if new relfilenode was created in this subxact or one of its
|
||||||
@ -2576,6 +2611,13 @@ CopyFrom(CopyState cstate)
|
|||||||
|
|
||||||
FreeExecutorState(estate);
|
FreeExecutorState(estate);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we skipped writing WAL, then we need to sync the heap (but not
|
||||||
|
* indexes since those use WAL anyway)
|
||||||
|
*/
|
||||||
|
if (hi_options & HEAP_INSERT_SKIP_WAL)
|
||||||
|
heap_sync(cstate->rel);
|
||||||
|
|
||||||
return processed;
|
return processed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,13 +562,16 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
|
|||||||
myState->rel = intoRelationDesc;
|
myState->rel = intoRelationDesc;
|
||||||
myState->reladdr = intoRelationAddr;
|
myState->reladdr = intoRelationAddr;
|
||||||
myState->output_cid = GetCurrentCommandId(true);
|
myState->output_cid = GetCurrentCommandId(true);
|
||||||
myState->hi_options = HEAP_INSERT_SKIP_FSM;
|
|
||||||
myState->bistate = GetBulkInsertState();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Valid smgr_targblock implies something already wrote to the relation.
|
* We can skip WAL-logging the insertions, unless PITR or streaming
|
||||||
* This may be harmless, but this function hasn't planned for it.
|
* replication is in use. We can skip the FSM in any case.
|
||||||
*/
|
*/
|
||||||
|
myState->hi_options = HEAP_INSERT_SKIP_FSM |
|
||||||
|
(XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
|
||||||
|
myState->bistate = GetBulkInsertState();
|
||||||
|
|
||||||
|
/* Not using WAL requires smgr_targblock be initially invalid */
|
||||||
Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
|
Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -614,6 +617,10 @@ intorel_shutdown(DestReceiver *self)
|
|||||||
|
|
||||||
FreeBulkInsertState(myState->bistate);
|
FreeBulkInsertState(myState->bistate);
|
||||||
|
|
||||||
|
/* If we skipped using WAL, must heap_sync before commit */
|
||||||
|
if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
|
||||||
|
heap_sync(myState->rel);
|
||||||
|
|
||||||
/* close rel, but keep lock until commit */
|
/* close rel, but keep lock until commit */
|
||||||
heap_close(myState->rel, NoLock);
|
heap_close(myState->rel, NoLock);
|
||||||
myState->rel = NULL;
|
myState->rel = NULL;
|
||||||
|
@ -436,13 +436,17 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
|
|||||||
*/
|
*/
|
||||||
myState->transientrel = transientrel;
|
myState->transientrel = transientrel;
|
||||||
myState->output_cid = GetCurrentCommandId(true);
|
myState->output_cid = GetCurrentCommandId(true);
|
||||||
myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN;
|
|
||||||
myState->bistate = GetBulkInsertState();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Valid smgr_targblock implies something already wrote to the relation.
|
* We can skip WAL-logging the insertions, unless PITR or streaming
|
||||||
* This may be harmless, but this function hasn't planned for it.
|
* replication is in use. We can skip the FSM in any case.
|
||||||
*/
|
*/
|
||||||
|
myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN;
|
||||||
|
if (!XLogIsNeeded())
|
||||||
|
myState->hi_options |= HEAP_INSERT_SKIP_WAL;
|
||||||
|
myState->bistate = GetBulkInsertState();
|
||||||
|
|
||||||
|
/* Not using WAL requires smgr_targblock be initially invalid */
|
||||||
Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
|
Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -482,6 +486,10 @@ transientrel_shutdown(DestReceiver *self)
|
|||||||
|
|
||||||
FreeBulkInsertState(myState->bistate);
|
FreeBulkInsertState(myState->bistate);
|
||||||
|
|
||||||
|
/* If we skipped using WAL, must heap_sync before commit */
|
||||||
|
if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
|
||||||
|
heap_sync(myState->transientrel);
|
||||||
|
|
||||||
/* close transientrel, but keep lock until commit */
|
/* close transientrel, but keep lock until commit */
|
||||||
heap_close(myState->transientrel, NoLock);
|
heap_close(myState->transientrel, NoLock);
|
||||||
myState->transientrel = NULL;
|
myState->transientrel = NULL;
|
||||||
|
@ -4021,14 +4021,19 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
|
|||||||
newrel = NULL;
|
newrel = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prepare a BulkInsertState and options for heap_insert. The FSM is
|
* Prepare a BulkInsertState and options for heap_insert. Because we're
|
||||||
* empty, so don't bother using it.
|
* building a new heap, we can skip WAL-logging and fsync it to disk at
|
||||||
|
* the end instead (unless WAL-logging is required for archiving or
|
||||||
|
* streaming replication). The FSM is empty too, so don't bother using it.
|
||||||
*/
|
*/
|
||||||
if (newrel)
|
if (newrel)
|
||||||
{
|
{
|
||||||
mycid = GetCurrentCommandId(true);
|
mycid = GetCurrentCommandId(true);
|
||||||
bistate = GetBulkInsertState();
|
bistate = GetBulkInsertState();
|
||||||
|
|
||||||
hi_options = HEAP_INSERT_SKIP_FSM;
|
hi_options = HEAP_INSERT_SKIP_FSM;
|
||||||
|
if (!XLogIsNeeded())
|
||||||
|
hi_options |= HEAP_INSERT_SKIP_WAL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -4278,6 +4283,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
|
|||||||
{
|
{
|
||||||
FreeBulkInsertState(bistate);
|
FreeBulkInsertState(bistate);
|
||||||
|
|
||||||
|
/* If we skipped writing WAL, then we need to sync the heap. */
|
||||||
|
if (hi_options & HEAP_INSERT_SKIP_WAL)
|
||||||
|
heap_sync(newrel);
|
||||||
|
|
||||||
heap_close(newrel, NoLock);
|
heap_close(newrel, NoLock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5979,19 +5988,14 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If TryReuseIndex() stashed a relfilenode for us, we used it for the new
|
* If TryReuseIndex() stashed a relfilenode for us, we used it for the new
|
||||||
* index instead of building from scratch. Restore associated fields.
|
* index instead of building from scratch. The DROP of the old edition of
|
||||||
* This may store InvalidSubTransactionId in both fields, in which case
|
* this index will have scheduled the storage for deletion at commit, so
|
||||||
* relcache.c will assume it can rebuild the relcache entry. Hence, do
|
* cancel that pending deletion.
|
||||||
* this after the CCI that made catalog rows visible to any rebuild. The
|
|
||||||
* DROP of the old edition of this index will have scheduled the storage
|
|
||||||
* for deletion at commit, so cancel that pending deletion.
|
|
||||||
*/
|
*/
|
||||||
if (OidIsValid(stmt->oldNode))
|
if (OidIsValid(stmt->oldNode))
|
||||||
{
|
{
|
||||||
Relation irel = index_open(address.objectId, NoLock);
|
Relation irel = index_open(address.objectId, NoLock);
|
||||||
|
|
||||||
irel->rd_createSubid = stmt->oldCreateSubid;
|
|
||||||
irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
|
|
||||||
RelationPreserveStorage(irel->rd_node, true);
|
RelationPreserveStorage(irel->rd_node, true);
|
||||||
index_close(irel, NoLock);
|
index_close(irel, NoLock);
|
||||||
}
|
}
|
||||||
@ -9130,8 +9134,6 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt)
|
|||||||
Relation irel = index_open(oldId, NoLock);
|
Relation irel = index_open(oldId, NoLock);
|
||||||
|
|
||||||
stmt->oldNode = irel->rd_node.relNode;
|
stmt->oldNode = irel->rd_node.relNode;
|
||||||
stmt->oldCreateSubid = irel->rd_createSubid;
|
|
||||||
stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
|
|
||||||
index_close(irel, NoLock);
|
index_close(irel, NoLock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -9977,8 +9979,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
|||||||
|
|
||||||
heap_close(pg_class, RowExclusiveLock);
|
heap_close(pg_class, RowExclusiveLock);
|
||||||
|
|
||||||
RelationAssumeNewRelfilenode(rel);
|
|
||||||
|
|
||||||
relation_close(rel, NoLock);
|
relation_close(rel, NoLock);
|
||||||
|
|
||||||
/* Make sure the reltablespace change is visible */
|
/* Make sure the reltablespace change is visible */
|
||||||
@ -10193,9 +10193,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to log the copied data in WAL iff WAL archiving/streaming is
|
* We need to log the copied data in WAL iff WAL archiving/streaming is
|
||||||
* enabled AND it's a permanent relation. This gives the same answer as
|
* enabled AND it's a permanent relation.
|
||||||
* "RelationNeedsWAL(rel) || copying_initfork", because we know the
|
|
||||||
* current operation created a new relfilenode.
|
|
||||||
*/
|
*/
|
||||||
use_wal = XLogIsNeeded() &&
|
use_wal = XLogIsNeeded() &&
|
||||||
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
|
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
|
||||||
@ -10237,15 +10235,21 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we WAL-logged rel pages, we must nonetheless fsync them. The
|
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
|
||||||
* reason is that since we're copying outside shared buffers, a CHECKPOINT
|
* to ensure that the toast table gets fsync'd too. (For a temp or
|
||||||
* occurring during the copy has no way to flush the previously written
|
* unlogged rel we don't care since the data will be gone after a crash
|
||||||
* data to disk (indeed it won't know the new rel even exists). A crash
|
* anyway.)
|
||||||
* later on would replay WAL from the checkpoint, therefore it wouldn't
|
*
|
||||||
* replay our earlier WAL entries. If we do not fsync those pages here,
|
* It's obvious that we must do this when not WAL-logging the copy. It's
|
||||||
* they might still not be on disk when the crash occurs.
|
* less obvious that we have to do it even if we did WAL-log the copied
|
||||||
|
* pages. The reason is that since we're copying outside shared buffers, a
|
||||||
|
* CHECKPOINT occurring during the copy has no way to flush the previously
|
||||||
|
* written data to disk (indeed it won't know the new rel even exists). A
|
||||||
|
* crash later on would replay WAL from the checkpoint, therefore it
|
||||||
|
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
|
||||||
|
* here, they might still not be on disk when the crash occurs.
|
||||||
*/
|
*/
|
||||||
if (use_wal || copying_initfork)
|
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
|
||||||
smgrimmedsync(dst, forkNum);
|
smgrimmedsync(dst, forkNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3126,8 +3126,6 @@ _copyIndexStmt(const IndexStmt *from)
|
|||||||
COPY_STRING_FIELD(idxcomment);
|
COPY_STRING_FIELD(idxcomment);
|
||||||
COPY_SCALAR_FIELD(indexOid);
|
COPY_SCALAR_FIELD(indexOid);
|
||||||
COPY_SCALAR_FIELD(oldNode);
|
COPY_SCALAR_FIELD(oldNode);
|
||||||
COPY_SCALAR_FIELD(oldCreateSubid);
|
|
||||||
COPY_SCALAR_FIELD(oldFirstRelfilenodeSubid);
|
|
||||||
COPY_SCALAR_FIELD(unique);
|
COPY_SCALAR_FIELD(unique);
|
||||||
COPY_SCALAR_FIELD(primary);
|
COPY_SCALAR_FIELD(primary);
|
||||||
COPY_SCALAR_FIELD(isconstraint);
|
COPY_SCALAR_FIELD(isconstraint);
|
||||||
|
@ -1259,8 +1259,6 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
|
|||||||
COMPARE_STRING_FIELD(idxcomment);
|
COMPARE_STRING_FIELD(idxcomment);
|
||||||
COMPARE_SCALAR_FIELD(indexOid);
|
COMPARE_SCALAR_FIELD(indexOid);
|
||||||
COMPARE_SCALAR_FIELD(oldNode);
|
COMPARE_SCALAR_FIELD(oldNode);
|
||||||
COMPARE_SCALAR_FIELD(oldCreateSubid);
|
|
||||||
COMPARE_SCALAR_FIELD(oldFirstRelfilenodeSubid);
|
|
||||||
COMPARE_SCALAR_FIELD(unique);
|
COMPARE_SCALAR_FIELD(unique);
|
||||||
COMPARE_SCALAR_FIELD(primary);
|
COMPARE_SCALAR_FIELD(primary);
|
||||||
COMPARE_SCALAR_FIELD(isconstraint);
|
COMPARE_SCALAR_FIELD(isconstraint);
|
||||||
|
@ -2452,8 +2452,6 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
|
|||||||
WRITE_STRING_FIELD(idxcomment);
|
WRITE_STRING_FIELD(idxcomment);
|
||||||
WRITE_OID_FIELD(indexOid);
|
WRITE_OID_FIELD(indexOid);
|
||||||
WRITE_OID_FIELD(oldNode);
|
WRITE_OID_FIELD(oldNode);
|
||||||
WRITE_UINT_FIELD(oldCreateSubid);
|
|
||||||
WRITE_UINT_FIELD(oldFirstRelfilenodeSubid);
|
|
||||||
WRITE_BOOL_FIELD(unique);
|
WRITE_BOOL_FIELD(unique);
|
||||||
WRITE_BOOL_FIELD(primary);
|
WRITE_BOOL_FIELD(primary);
|
||||||
WRITE_BOOL_FIELD(isconstraint);
|
WRITE_BOOL_FIELD(isconstraint);
|
||||||
|
@ -6664,8 +6664,6 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name
|
|||||||
n->idxcomment = NULL;
|
n->idxcomment = NULL;
|
||||||
n->indexOid = InvalidOid;
|
n->indexOid = InvalidOid;
|
||||||
n->oldNode = InvalidOid;
|
n->oldNode = InvalidOid;
|
||||||
n->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
n->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
n->primary = false;
|
n->primary = false;
|
||||||
n->isconstraint = false;
|
n->isconstraint = false;
|
||||||
n->deferrable = false;
|
n->deferrable = false;
|
||||||
@ -6692,8 +6690,6 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name
|
|||||||
n->idxcomment = NULL;
|
n->idxcomment = NULL;
|
||||||
n->indexOid = InvalidOid;
|
n->indexOid = InvalidOid;
|
||||||
n->oldNode = InvalidOid;
|
n->oldNode = InvalidOid;
|
||||||
n->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
n->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
n->primary = false;
|
n->primary = false;
|
||||||
n->isconstraint = false;
|
n->isconstraint = false;
|
||||||
n->deferrable = false;
|
n->deferrable = false;
|
||||||
|
@ -1121,8 +1121,6 @@ generateClonedIndexStmt(CreateStmtContext *cxt, Relation source_idx,
|
|||||||
index->idxcomment = NULL;
|
index->idxcomment = NULL;
|
||||||
index->indexOid = InvalidOid;
|
index->indexOid = InvalidOid;
|
||||||
index->oldNode = InvalidOid;
|
index->oldNode = InvalidOid;
|
||||||
index->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
index->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
index->unique = idxrec->indisunique;
|
index->unique = idxrec->indisunique;
|
||||||
index->primary = idxrec->indisprimary;
|
index->primary = idxrec->indisprimary;
|
||||||
index->transformed = true; /* don't need transformIndexStmt */
|
index->transformed = true; /* don't need transformIndexStmt */
|
||||||
@ -1588,8 +1586,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
|
|||||||
index->idxcomment = NULL;
|
index->idxcomment = NULL;
|
||||||
index->indexOid = InvalidOid;
|
index->indexOid = InvalidOid;
|
||||||
index->oldNode = InvalidOid;
|
index->oldNode = InvalidOid;
|
||||||
index->oldCreateSubid = InvalidSubTransactionId;
|
|
||||||
index->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
index->transformed = false;
|
index->transformed = false;
|
||||||
index->concurrent = false;
|
index->concurrent = false;
|
||||||
index->if_not_exists = false;
|
index->if_not_exists = false;
|
||||||
|
@ -65,7 +65,7 @@
|
|||||||
#define BUF_WRITTEN 0x01
|
#define BUF_WRITTEN 0x01
|
||||||
#define BUF_REUSABLE 0x02
|
#define BUF_REUSABLE 0x02
|
||||||
|
|
||||||
#define RELS_BSEARCH_THRESHOLD 20
|
#define DROP_RELS_BSEARCH_THRESHOLD 20
|
||||||
|
|
||||||
typedef struct PrivateRefCountEntry
|
typedef struct PrivateRefCountEntry
|
||||||
{
|
{
|
||||||
@ -104,19 +104,6 @@ typedef struct CkptTsStatus
|
|||||||
int index;
|
int index;
|
||||||
} CkptTsStatus;
|
} CkptTsStatus;
|
||||||
|
|
||||||
/*
|
|
||||||
* Type for array used to sort SMgrRelations
|
|
||||||
*
|
|
||||||
* FlushRelationsAllBuffers shares the same comparator function with
|
|
||||||
* DropRelFileNodesAllBuffers. Pointer to this struct and RelFileNode must be
|
|
||||||
* compatible.
|
|
||||||
*/
|
|
||||||
typedef struct SMgrSortArray
|
|
||||||
{
|
|
||||||
RelFileNode rnode; /* This must be the first member */
|
|
||||||
SMgrRelation srel;
|
|
||||||
} SMgrSortArray;
|
|
||||||
|
|
||||||
/* GUC variables */
|
/* GUC variables */
|
||||||
bool zero_damaged_pages = false;
|
bool zero_damaged_pages = false;
|
||||||
int bgwriter_lru_maxpages = 100;
|
int bgwriter_lru_maxpages = 100;
|
||||||
@ -2990,7 +2977,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
|
|||||||
* an exactly determined value, as it depends on many factors (CPU and RAM
|
* an exactly determined value, as it depends on many factors (CPU and RAM
|
||||||
* speeds, amount of shared buffers etc.).
|
* speeds, amount of shared buffers etc.).
|
||||||
*/
|
*/
|
||||||
use_bsearch = n > RELS_BSEARCH_THRESHOLD;
|
use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
|
||||||
|
|
||||||
/* sort the list of rnodes if necessary */
|
/* sort the list of rnodes if necessary */
|
||||||
if (use_bsearch)
|
if (use_bsearch)
|
||||||
@ -3240,104 +3227,6 @@ FlushRelationBuffers(Relation rel)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------
|
|
||||||
* FlushRelationsAllBuffers
|
|
||||||
*
|
|
||||||
* This function flushes out of the buffer pool all the pages of all
|
|
||||||
* forks of the specified smgr relations. It's equivalent to calling
|
|
||||||
* FlushRelationBuffers once per fork per relation. The relations are
|
|
||||||
* assumed not to use local buffers.
|
|
||||||
* --------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
SMgrSortArray *srels;
|
|
||||||
bool use_bsearch;
|
|
||||||
|
|
||||||
if (nrels == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* fill-in array for qsort */
|
|
||||||
srels = palloc(sizeof(SMgrSortArray) * nrels);
|
|
||||||
|
|
||||||
for (i = 0; i < nrels; i++)
|
|
||||||
{
|
|
||||||
Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
|
|
||||||
|
|
||||||
srels[i].rnode = smgrs[i]->smgr_rnode.node;
|
|
||||||
srels[i].srel = smgrs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Save the bsearch overhead for low number of relations to sync. See
|
|
||||||
* DropRelFileNodesAllBuffers for details.
|
|
||||||
*/
|
|
||||||
use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
|
|
||||||
|
|
||||||
/* sort the list of SMgrRelations if necessary */
|
|
||||||
if (use_bsearch)
|
|
||||||
pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
|
|
||||||
|
|
||||||
/* Make sure we can handle the pin inside the loop */
|
|
||||||
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
|
|
||||||
|
|
||||||
for (i = 0; i < NBuffers; i++)
|
|
||||||
{
|
|
||||||
SMgrSortArray *srelent = NULL;
|
|
||||||
BufferDesc *bufHdr = GetBufferDescriptor(i);
|
|
||||||
uint32 buf_state;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
|
|
||||||
* and saves some cycles.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (!use_bsearch)
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
|
|
||||||
for (j = 0; j < nrels; j++)
|
|
||||||
{
|
|
||||||
if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
|
|
||||||
{
|
|
||||||
srelent = &srels[j];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
srelent = bsearch((const void *) &(bufHdr->tag.rnode),
|
|
||||||
srels, nrels, sizeof(SMgrSortArray),
|
|
||||||
rnode_comparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* buffer doesn't belong to any of the given relfilenodes; skip it */
|
|
||||||
if (srelent == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ReservePrivateRefCountEntry();
|
|
||||||
|
|
||||||
buf_state = LockBufHdr(bufHdr);
|
|
||||||
if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
|
|
||||||
(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
|
|
||||||
{
|
|
||||||
PinBuffer_Locked(bufHdr);
|
|
||||||
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
|
|
||||||
FlushBuffer(bufHdr, srelent->srel);
|
|
||||||
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
|
|
||||||
UnpinBuffer(bufHdr, true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
UnlockBufHdr(bufHdr, buf_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(srels);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------
|
/* ---------------------------------------------------------------------
|
||||||
* FlushDatabaseBuffers
|
* FlushDatabaseBuffers
|
||||||
*
|
*
|
||||||
@ -3539,15 +3428,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
|
|||||||
(pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
|
(pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If we must not write WAL, due to a relfilenode-specific
|
* If we're in recovery we cannot dirty a page because of a hint.
|
||||||
* condition or being in recovery, don't dirty the page. We can
|
* We can set the hint, just not dirty the page as a result so the
|
||||||
* set the hint, just not dirty the page as a result so the hint
|
* hint is lost when we evict the page or shutdown.
|
||||||
* is lost when we evict the page or shutdown.
|
|
||||||
*
|
*
|
||||||
* See src/backend/storage/page/README for longer discussion.
|
* See src/backend/storage/page/README for longer discussion.
|
||||||
*/
|
*/
|
||||||
if (RecoveryInProgress() ||
|
if (RecoveryInProgress())
|
||||||
RelFileNodeSkippingWAL(bufHdr->tag.rnode))
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -563,18 +563,6 @@ DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
/*
|
|
||||||
* GetLockMethodLocalHash -- return the hash of local locks, for modules that
|
|
||||||
* evaluate assertions based on all locks held.
|
|
||||||
*/
|
|
||||||
HTAB *
|
|
||||||
GetLockMethodLocalHash(void)
|
|
||||||
{
|
|
||||||
return LockMethodLocalHash;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LockHasWaiters -- look up 'locktag' and check if releasing this
|
* LockHasWaiters -- look up 'locktag' and check if releasing this
|
||||||
* lock would wake up other processes waiting for it.
|
* lock would wake up other processes waiting for it.
|
||||||
|
@ -359,10 +359,11 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
|||||||
* During replay, we would delete the file and then recreate it, which is fine
|
* During replay, we would delete the file and then recreate it, which is fine
|
||||||
* if the contents of the file were repopulated by subsequent WAL entries.
|
* if the contents of the file were repopulated by subsequent WAL entries.
|
||||||
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
|
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
|
||||||
* file after populating it (as we do at wal_level=minimal), the contents of
|
* file after populating it (as for instance CLUSTER and CREATE INDEX do),
|
||||||
* the file would be lost forever. By leaving the empty file until after the
|
* the contents of the file would be lost forever. By leaving the empty file
|
||||||
* next checkpoint, we prevent reassignment of the relfilenode number until
|
* until after the next checkpoint, we prevent reassignment of the relfilenode
|
||||||
* it's safe, because relfilenode assignment skips over any existing file.
|
* number until it's safe, because relfilenode assignment skips over any
|
||||||
|
* existing file.
|
||||||
*
|
*
|
||||||
* We do not need to go through this dance for temp relations, though, because
|
* We do not need to go through this dance for temp relations, though, because
|
||||||
* we never make WAL entries for temp rels, and so a temp rel poses no threat
|
* we never make WAL entries for temp rels, and so a temp rel poses no threat
|
||||||
@ -1018,19 +1019,12 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
|||||||
* mdimmedsync() -- Immediately sync a relation to stable storage.
|
* mdimmedsync() -- Immediately sync a relation to stable storage.
|
||||||
*
|
*
|
||||||
* Note that only writes already issued are synced; this routine knows
|
* Note that only writes already issued are synced; this routine knows
|
||||||
* nothing of dirty buffers that may exist inside the buffer manager. We
|
* nothing of dirty buffers that may exist inside the buffer manager.
|
||||||
* sync active and inactive segments; smgrDoPendingSyncs() relies on this.
|
|
||||||
* Consider a relation skipping WAL. Suppose a checkpoint syncs blocks of
|
|
||||||
* some segment, then mdtruncate() renders that segment inactive. If we
|
|
||||||
* crash before the next checkpoint syncs the newly-inactive segment, that
|
|
||||||
* segment may survive recovery, reintroducing unwanted data into the table.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||||
{
|
{
|
||||||
MdfdVec *v;
|
MdfdVec *v;
|
||||||
BlockNumber segno = 0;
|
|
||||||
bool active = true;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||||
@ -1040,42 +1034,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
|||||||
|
|
||||||
v = mdopen(reln, forknum, EXTENSION_FAIL);
|
v = mdopen(reln, forknum, EXTENSION_FAIL);
|
||||||
|
|
||||||
/*
|
|
||||||
* Temporarily open inactive segments, then close them after sync. There
|
|
||||||
* may be some inactive segments left opened after fsync() error, but that
|
|
||||||
* is harmless. We don't bother to clean them up and take a risk of
|
|
||||||
* further trouble. The next mdclose() will soon close them.
|
|
||||||
*/
|
|
||||||
while (v != NULL)
|
while (v != NULL)
|
||||||
{
|
{
|
||||||
File vfd = v->mdfd_vfd;
|
if (FileSync(v->mdfd_vfd) < 0)
|
||||||
|
|
||||||
if (active)
|
|
||||||
v = v->mdfd_chain;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Assert(v->mdfd_chain == NULL);
|
|
||||||
pfree(v);
|
|
||||||
v = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FileSync(vfd) < 0)
|
|
||||||
ereport(data_sync_elevel(ERROR),
|
ereport(data_sync_elevel(ERROR),
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not fsync file \"%s\": %m",
|
errmsg("could not fsync file \"%s\": %m",
|
||||||
FilePathName(vfd))));
|
FilePathName(v->mdfd_vfd))));
|
||||||
|
v = v->mdfd_chain;
|
||||||
/* Close inactive segments immediately */
|
|
||||||
if (!active)
|
|
||||||
FileClose(vfd);
|
|
||||||
|
|
||||||
segno++;
|
|
||||||
|
|
||||||
if (v == NULL)
|
|
||||||
{
|
|
||||||
v = _mdfd_openseg(reln, forknum, segno, 0);
|
|
||||||
active = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -408,41 +408,6 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
|
|||||||
(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
|
(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* smgrdosyncall() -- Immediately sync all forks of all given relations
|
|
||||||
*
|
|
||||||
* All forks of all given relations are synced out to the store.
|
|
||||||
*
|
|
||||||
* This is equivalent to FlushRelationBuffers() for each smgr relation,
|
|
||||||
* then calling smgrimmedsync() for all forks of each relation, but it's
|
|
||||||
* significantly quicker so should be preferred when possible.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
smgrdosyncall(SMgrRelation *rels, int nrels)
|
|
||||||
{
|
|
||||||
int i = 0;
|
|
||||||
ForkNumber forknum;
|
|
||||||
|
|
||||||
if (nrels == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
FlushRelationsAllBuffers(rels, nrels);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Sync the physical file(s).
|
|
||||||
*/
|
|
||||||
for (i = 0; i < nrels; i++)
|
|
||||||
{
|
|
||||||
int which = rels[i]->smgr_which;
|
|
||||||
|
|
||||||
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
|
||||||
{
|
|
||||||
if (smgrsw[which].smgr_exists(rels[i], forknum))
|
|
||||||
smgrsw[which].smgr_immedsync(rels[i], forknum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* smgrdounlinkall() -- Immediately unlink all forks of all given relations
|
* smgrdounlinkall() -- Immediately unlink all forks of all given relations
|
||||||
*
|
*
|
||||||
|
274
src/backend/utils/cache/relcache.c
vendored
274
src/backend/utils/cache/relcache.c
vendored
@ -244,9 +244,6 @@ static void RelationReloadIndexInfo(Relation relation);
|
|||||||
static void RelationReloadNailed(Relation relation);
|
static void RelationReloadNailed(Relation relation);
|
||||||
static void RelationFlushRelation(Relation relation);
|
static void RelationFlushRelation(Relation relation);
|
||||||
static void RememberToFreeTupleDescAtEOX(TupleDesc td);
|
static void RememberToFreeTupleDescAtEOX(TupleDesc td);
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
static void AssertPendingSyncConsistency(Relation relation);
|
|
||||||
#endif
|
|
||||||
static void AtEOXact_cleanup(Relation relation, bool isCommit);
|
static void AtEOXact_cleanup(Relation relation, bool isCommit);
|
||||||
static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
|
static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
|
||||||
SubTransactionId mySubid, SubTransactionId parentSubid);
|
SubTransactionId mySubid, SubTransactionId parentSubid);
|
||||||
@ -984,8 +981,6 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
|
|||||||
relation->rd_isnailed = false;
|
relation->rd_isnailed = false;
|
||||||
relation->rd_createSubid = InvalidSubTransactionId;
|
relation->rd_createSubid = InvalidSubTransactionId;
|
||||||
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
switch (relation->rd_rel->relpersistence)
|
switch (relation->rd_rel->relpersistence)
|
||||||
{
|
{
|
||||||
case RELPERSISTENCE_UNLOGGED:
|
case RELPERSISTENCE_UNLOGGED:
|
||||||
@ -1609,8 +1604,6 @@ formrdesc(const char *relationName, Oid relationReltype,
|
|||||||
relation->rd_isnailed = true;
|
relation->rd_isnailed = true;
|
||||||
relation->rd_createSubid = InvalidSubTransactionId;
|
relation->rd_createSubid = InvalidSubTransactionId;
|
||||||
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_backend = InvalidBackendId;
|
relation->rd_backend = InvalidBackendId;
|
||||||
relation->rd_islocaltemp = false;
|
relation->rd_islocaltemp = false;
|
||||||
|
|
||||||
@ -1777,13 +1770,6 @@ RelationIdGetRelation(Oid relationId)
|
|||||||
|
|
||||||
if (RelationIsValid(rd))
|
if (RelationIsValid(rd))
|
||||||
{
|
{
|
||||||
/* return NULL for dropped relations */
|
|
||||||
if (rd->rd_droppedSubid != InvalidSubTransactionId)
|
|
||||||
{
|
|
||||||
Assert(!rd->rd_isvalid);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
RelationIncrementReferenceCount(rd);
|
RelationIncrementReferenceCount(rd);
|
||||||
/* revalidate cache entry if necessary */
|
/* revalidate cache entry if necessary */
|
||||||
if (!rd->rd_isvalid)
|
if (!rd->rd_isvalid)
|
||||||
@ -1876,7 +1862,7 @@ RelationClose(Relation relation)
|
|||||||
#ifdef RELCACHE_FORCE_RELEASE
|
#ifdef RELCACHE_FORCE_RELEASE
|
||||||
if (RelationHasReferenceCountZero(relation) &&
|
if (RelationHasReferenceCountZero(relation) &&
|
||||||
relation->rd_createSubid == InvalidSubTransactionId &&
|
relation->rd_createSubid == InvalidSubTransactionId &&
|
||||||
relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)
|
relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
|
||||||
RelationClearRelation(relation, false);
|
RelationClearRelation(relation, false);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1915,10 +1901,9 @@ RelationReloadIndexInfo(Relation relation)
|
|||||||
HeapTuple pg_class_tuple;
|
HeapTuple pg_class_tuple;
|
||||||
Form_pg_class relp;
|
Form_pg_class relp;
|
||||||
|
|
||||||
/* Should be called only for invalidated, live indexes */
|
/* Should be called only for invalidated indexes */
|
||||||
Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
|
Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
|
||||||
!relation->rd_isvalid &&
|
!relation->rd_isvalid);
|
||||||
relation->rd_droppedSubid == InvalidSubTransactionId);
|
|
||||||
|
|
||||||
/* Ensure it's closed at smgr level */
|
/* Ensure it's closed at smgr level */
|
||||||
RelationCloseSmgr(relation);
|
RelationCloseSmgr(relation);
|
||||||
@ -2198,13 +2183,6 @@ RelationClearRelation(Relation relation, bool rebuild)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mark it invalid until we've finished rebuild */
|
|
||||||
relation->rd_isvalid = false;
|
|
||||||
|
|
||||||
/* See RelationForgetRelation(). */
|
|
||||||
if (relation->rd_droppedSubid != InvalidSubTransactionId)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Even non-system indexes should not be blown away if they are open and
|
* Even non-system indexes should not be blown away if they are open and
|
||||||
* have valid index support information. This avoids problems with active
|
* have valid index support information. This avoids problems with active
|
||||||
@ -2216,11 +2194,15 @@ RelationClearRelation(Relation relation, bool rebuild)
|
|||||||
relation->rd_refcnt > 0 &&
|
relation->rd_refcnt > 0 &&
|
||||||
relation->rd_indexcxt != NULL)
|
relation->rd_indexcxt != NULL)
|
||||||
{
|
{
|
||||||
|
relation->rd_isvalid = false; /* needs to be revalidated */
|
||||||
if (IsTransactionState())
|
if (IsTransactionState())
|
||||||
RelationReloadIndexInfo(relation);
|
RelationReloadIndexInfo(relation);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Mark it invalid until we've finished rebuild */
|
||||||
|
relation->rd_isvalid = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're really done with the relcache entry, blow it away. But if
|
* If we're really done with the relcache entry, blow it away. But if
|
||||||
* someone is still using it, reconstruct the whole deal without moving
|
* someone is still using it, reconstruct the whole deal without moving
|
||||||
@ -2278,12 +2260,12 @@ RelationClearRelation(Relation relation, bool rebuild)
|
|||||||
* problem.
|
* problem.
|
||||||
*
|
*
|
||||||
* When rebuilding an open relcache entry, we must preserve ref count,
|
* When rebuilding an open relcache entry, we must preserve ref count,
|
||||||
* rd_*Subid, and rd_toastoid state. Also attempt to preserve the
|
* rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
|
||||||
* pg_class entry (rd_rel), tupledesc, and rewrite-rule substructures
|
* attempt to preserve the pg_class entry (rd_rel), tupledesc, and
|
||||||
* in place, because various places assume that these structures won't
|
* rewrite-rule substructures in place, because various places assume
|
||||||
* move while they are working with an open relcache entry. (Note:
|
* that these structures won't move while they are working with an
|
||||||
* the refcount mechanism for tupledescs might someday allow us to
|
* open relcache entry. (Note: the refcount mechanism for tupledescs
|
||||||
* remove this hack for the tupledesc.)
|
* might someday allow us to remove this hack for the tupledesc.)
|
||||||
*
|
*
|
||||||
* Note that this process does not touch CurrentResourceOwner; which
|
* Note that this process does not touch CurrentResourceOwner; which
|
||||||
* is good because whatever ref counts the entry may have do not
|
* is good because whatever ref counts the entry may have do not
|
||||||
@ -2360,8 +2342,6 @@ RelationClearRelation(Relation relation, bool rebuild)
|
|||||||
/* creation sub-XIDs must be preserved */
|
/* creation sub-XIDs must be preserved */
|
||||||
SWAPFIELD(SubTransactionId, rd_createSubid);
|
SWAPFIELD(SubTransactionId, rd_createSubid);
|
||||||
SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
|
SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
|
||||||
SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid);
|
|
||||||
SWAPFIELD(SubTransactionId, rd_droppedSubid);
|
|
||||||
/* un-swap rd_rel pointers, swap contents instead */
|
/* un-swap rd_rel pointers, swap contents instead */
|
||||||
SWAPFIELD(Form_pg_class, rd_rel);
|
SWAPFIELD(Form_pg_class, rd_rel);
|
||||||
/* ... but actually, we don't have to update newrel->rd_rel */
|
/* ... but actually, we don't have to update newrel->rd_rel */
|
||||||
@ -2398,12 +2378,12 @@ static void
|
|||||||
RelationFlushRelation(Relation relation)
|
RelationFlushRelation(Relation relation)
|
||||||
{
|
{
|
||||||
if (relation->rd_createSubid != InvalidSubTransactionId ||
|
if (relation->rd_createSubid != InvalidSubTransactionId ||
|
||||||
relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
|
relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* New relcache entries are always rebuilt, not flushed; else we'd
|
* New relcache entries are always rebuilt, not flushed; else we'd
|
||||||
* forget the "new" status of the relation. Ditto for the
|
* forget the "new" status of the relation, which is a useful
|
||||||
* new-relfilenode status.
|
* optimization to have. Ditto for the new-relfilenode status.
|
||||||
*
|
*
|
||||||
* The rel could have zero refcnt here, so temporarily increment the
|
* The rel could have zero refcnt here, so temporarily increment the
|
||||||
* refcnt to ensure it's safe to rebuild it. We can assume that the
|
* refcnt to ensure it's safe to rebuild it. We can assume that the
|
||||||
@ -2425,7 +2405,10 @@ RelationFlushRelation(Relation relation)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationForgetRelation - caller reports that it dropped the relation
|
* RelationForgetRelation - unconditionally remove a relcache entry
|
||||||
|
*
|
||||||
|
* External interface for destroying a relcache entry when we
|
||||||
|
* drop the relation.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RelationForgetRelation(Oid rid)
|
RelationForgetRelation(Oid rid)
|
||||||
@ -2440,19 +2423,7 @@ RelationForgetRelation(Oid rid)
|
|||||||
if (!RelationHasReferenceCountZero(relation))
|
if (!RelationHasReferenceCountZero(relation))
|
||||||
elog(ERROR, "relation %u is still open", rid);
|
elog(ERROR, "relation %u is still open", rid);
|
||||||
|
|
||||||
Assert(relation->rd_droppedSubid == InvalidSubTransactionId);
|
/* Unconditionally destroy the relcache entry */
|
||||||
if (relation->rd_createSubid != InvalidSubTransactionId ||
|
|
||||||
relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* In the event of subtransaction rollback, we must not forget
|
|
||||||
* rd_*Subid. Mark the entry "dropped" so RelationClearRelation()
|
|
||||||
* invalidates it in lieu of destroying it. (If we're in a top
|
|
||||||
* transaction, we could opt to destroy the entry.)
|
|
||||||
*/
|
|
||||||
relation->rd_droppedSubid = GetCurrentSubTransactionId();
|
|
||||||
}
|
|
||||||
|
|
||||||
RelationClearRelation(relation, false);
|
RelationClearRelation(relation, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2492,10 +2463,11 @@ RelationCacheInvalidateEntry(Oid relationId)
|
|||||||
* relation cache and re-read relation mapping data.
|
* relation cache and re-read relation mapping data.
|
||||||
*
|
*
|
||||||
* This is currently used only to recover from SI message buffer overflow,
|
* This is currently used only to recover from SI message buffer overflow,
|
||||||
* so we do not touch relations having new-in-transaction relfilenodes; they
|
* so we do not touch new-in-transaction relations; they cannot be targets
|
||||||
* cannot be targets of cross-backend SI updates (and our own updates now go
|
* of cross-backend SI updates (and our own updates now go through a
|
||||||
* through a separate linked list that isn't limited by the SI message
|
* separate linked list that isn't limited by the SI message buffer size).
|
||||||
* buffer size).
|
* Likewise, we need not discard new-relfilenode-in-transaction hints,
|
||||||
|
* since any invalidation of those would be a local event.
|
||||||
*
|
*
|
||||||
* We do this in two phases: the first pass deletes deletable items, and
|
* We do this in two phases: the first pass deletes deletable items, and
|
||||||
* the second one rebuilds the rebuildable items. This is essential for
|
* the second one rebuilds the rebuildable items. This is essential for
|
||||||
@ -2546,7 +2518,7 @@ RelationCacheInvalidate(void)
|
|||||||
* pending invalidations.
|
* pending invalidations.
|
||||||
*/
|
*/
|
||||||
if (relation->rd_createSubid != InvalidSubTransactionId ||
|
if (relation->rd_createSubid != InvalidSubTransactionId ||
|
||||||
relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
|
relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
relcacheInvalsReceived++;
|
relcacheInvalsReceived++;
|
||||||
@ -2658,96 +2630,6 @@ RememberToFreeTupleDescAtEOX(TupleDesc td)
|
|||||||
EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
|
EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
/*
|
|
||||||
* Relation kinds that have physical storage. These relations normally have
|
|
||||||
* relfilenode set to non-zero, but it can also be zero if the relation is
|
|
||||||
* mapped.
|
|
||||||
*/
|
|
||||||
#define RELKIND_HAS_STORAGE(relkind) \
|
|
||||||
((relkind) == RELKIND_RELATION || \
|
|
||||||
(relkind) == RELKIND_INDEX || \
|
|
||||||
(relkind) == RELKIND_SEQUENCE || \
|
|
||||||
(relkind) == RELKIND_TOASTVALUE || \
|
|
||||||
(relkind) == RELKIND_MATVIEW)
|
|
||||||
|
|
||||||
static void
|
|
||||||
AssertPendingSyncConsistency(Relation relation)
|
|
||||||
{
|
|
||||||
bool relcache_verdict =
|
|
||||||
relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&
|
|
||||||
((relation->rd_createSubid != InvalidSubTransactionId &&
|
|
||||||
RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
|
|
||||||
relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId);
|
|
||||||
|
|
||||||
Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node));
|
|
||||||
|
|
||||||
if (relation->rd_droppedSubid != InvalidSubTransactionId)
|
|
||||||
Assert(!relation->rd_isvalid &&
|
|
||||||
(relation->rd_createSubid != InvalidSubTransactionId ||
|
|
||||||
relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* AssertPendingSyncs_RelationCache
|
|
||||||
*
|
|
||||||
* Assert that relcache.c and storage.c agree on whether to skip WAL.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
AssertPendingSyncs_RelationCache(void)
|
|
||||||
{
|
|
||||||
HASH_SEQ_STATUS status;
|
|
||||||
LOCALLOCK *locallock;
|
|
||||||
Relation *rels;
|
|
||||||
int maxrels;
|
|
||||||
int nrels;
|
|
||||||
RelIdCacheEnt *idhentry;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Open every relation that this transaction has locked. If, for some
|
|
||||||
* relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
|
|
||||||
* a CommandCounterIncrement() typically yields a local invalidation
|
|
||||||
* message that destroys the relcache entry. By recreating such entries
|
|
||||||
* here, we detect the problem.
|
|
||||||
*/
|
|
||||||
PushActiveSnapshot(GetTransactionSnapshot());
|
|
||||||
maxrels = 1;
|
|
||||||
rels = palloc(maxrels * sizeof(*rels));
|
|
||||||
nrels = 0;
|
|
||||||
hash_seq_init(&status, GetLockMethodLocalHash());
|
|
||||||
while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
|
|
||||||
{
|
|
||||||
Oid relid;
|
|
||||||
Relation r;
|
|
||||||
|
|
||||||
if (locallock->nLocks <= 0)
|
|
||||||
continue;
|
|
||||||
if ((LockTagType) locallock->tag.lock.locktag_type !=
|
|
||||||
LOCKTAG_RELATION)
|
|
||||||
continue;
|
|
||||||
relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
|
|
||||||
r = RelationIdGetRelation(relid);
|
|
||||||
if (!RelationIsValid(r))
|
|
||||||
continue;
|
|
||||||
if (nrels >= maxrels)
|
|
||||||
{
|
|
||||||
maxrels *= 2;
|
|
||||||
rels = repalloc(rels, maxrels * sizeof(*rels));
|
|
||||||
}
|
|
||||||
rels[nrels++] = r;
|
|
||||||
}
|
|
||||||
|
|
||||||
hash_seq_init(&status, RelationIdCache);
|
|
||||||
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
|
|
||||||
AssertPendingSyncConsistency(idhentry->reldesc);
|
|
||||||
|
|
||||||
for (i = 0; i < nrels; i++)
|
|
||||||
RelationClose(rels[i]);
|
|
||||||
PopActiveSnapshot();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AtEOXact_RelationCache
|
* AtEOXact_RelationCache
|
||||||
*
|
*
|
||||||
@ -2830,8 +2712,6 @@ AtEOXact_RelationCache(bool isCommit)
|
|||||||
static void
|
static void
|
||||||
AtEOXact_cleanup(Relation relation, bool isCommit)
|
AtEOXact_cleanup(Relation relation, bool isCommit)
|
||||||
{
|
{
|
||||||
bool clear_relcache = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The relcache entry's ref count should be back to its normal
|
* The relcache entry's ref count should be back to its normal
|
||||||
* not-in-a-transaction state: 0 unless it's nailed in cache.
|
* not-in-a-transaction state: 0 unless it's nailed in cache.
|
||||||
@ -2857,31 +2737,17 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is the relation live after this transaction ends?
|
* Is it a relation created in the current transaction?
|
||||||
*
|
*
|
||||||
* During commit, clear the relcache entry if it is preserved after
|
* During commit, reset the flag to zero, since we are now out of the
|
||||||
* relation drop, in order not to orphan the entry. During rollback,
|
* creating transaction. During abort, simply delete the relcache entry
|
||||||
* clear the relcache entry if the relation is created in the current
|
* --- it isn't interesting any longer.
|
||||||
* transaction since it isn't interesting any longer once we are out of
|
|
||||||
* the transaction.
|
|
||||||
*/
|
*/
|
||||||
clear_relcache =
|
if (relation->rd_createSubid != InvalidSubTransactionId)
|
||||||
(isCommit ?
|
|
||||||
relation->rd_droppedSubid != InvalidSubTransactionId :
|
|
||||||
relation->rd_createSubid != InvalidSubTransactionId);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Since we are now out of the transaction, reset the subids to zero.
|
|
||||||
* That also lets RelationClearRelation() drop the relcache entry.
|
|
||||||
*/
|
|
||||||
relation->rd_createSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
|
|
||||||
if (clear_relcache)
|
|
||||||
{
|
{
|
||||||
if (RelationHasReferenceCountZero(relation))
|
if (isCommit)
|
||||||
|
relation->rd_createSubid = InvalidSubTransactionId;
|
||||||
|
else if (RelationHasReferenceCountZero(relation))
|
||||||
{
|
{
|
||||||
RelationClearRelation(relation, false);
|
RelationClearRelation(relation, false);
|
||||||
return;
|
return;
|
||||||
@ -2896,11 +2762,17 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
|
|||||||
* eventually. This must be just a WARNING to avoid
|
* eventually. This must be just a WARNING to avoid
|
||||||
* error-during-error-recovery loops.
|
* error-during-error-recovery loops.
|
||||||
*/
|
*/
|
||||||
|
relation->rd_createSubid = InvalidSubTransactionId;
|
||||||
elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
|
elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
|
||||||
RelationGetRelationName(relation));
|
RelationGetRelationName(relation));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Likewise, reset the hint about the relfilenode being new.
|
||||||
|
*/
|
||||||
|
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flush any temporary index list.
|
* Flush any temporary index list.
|
||||||
*/
|
*/
|
||||||
@ -2975,28 +2847,15 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
|
|||||||
/*
|
/*
|
||||||
* Is it a relation created in the current subtransaction?
|
* Is it a relation created in the current subtransaction?
|
||||||
*
|
*
|
||||||
* During subcommit, mark it as belonging to the parent, instead, as long
|
* During subcommit, mark it as belonging to the parent, instead. During
|
||||||
* as it has not been dropped. Otherwise simply delete the relcache entry.
|
* subabort, simply delete the relcache entry.
|
||||||
* --- it isn't interesting any longer.
|
|
||||||
*/
|
*/
|
||||||
if (relation->rd_createSubid == mySubid)
|
if (relation->rd_createSubid == mySubid)
|
||||||
{
|
{
|
||||||
/*
|
if (isCommit)
|
||||||
* Valid rd_droppedSubid means the corresponding relation is dropped
|
|
||||||
* but the relcache entry is preserved for at-commit pending sync. We
|
|
||||||
* need to drop it explicitly here not to make the entry orphan.
|
|
||||||
*/
|
|
||||||
Assert(relation->rd_droppedSubid == mySubid ||
|
|
||||||
relation->rd_droppedSubid == InvalidSubTransactionId);
|
|
||||||
if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
|
|
||||||
relation->rd_createSubid = parentSubid;
|
relation->rd_createSubid = parentSubid;
|
||||||
else if (RelationHasReferenceCountZero(relation))
|
else if (RelationHasReferenceCountZero(relation))
|
||||||
{
|
{
|
||||||
/* allow the entry to be removed */
|
|
||||||
relation->rd_createSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
relation->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
RelationClearRelation(relation, false);
|
RelationClearRelation(relation, false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3016,8 +2875,7 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Likewise, update or drop any new-relfilenode-in-subtransaction record
|
* Likewise, update or drop any new-relfilenode-in-subtransaction hint.
|
||||||
* or drop record.
|
|
||||||
*/
|
*/
|
||||||
if (relation->rd_newRelfilenodeSubid == mySubid)
|
if (relation->rd_newRelfilenodeSubid == mySubid)
|
||||||
{
|
{
|
||||||
@ -3027,22 +2885,6 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
|
|||||||
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (relation->rd_firstRelfilenodeSubid == mySubid)
|
|
||||||
{
|
|
||||||
if (isCommit)
|
|
||||||
relation->rd_firstRelfilenodeSubid = parentSubid;
|
|
||||||
else
|
|
||||||
relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (relation->rd_droppedSubid == mySubid)
|
|
||||||
{
|
|
||||||
if (isCommit)
|
|
||||||
relation->rd_droppedSubid = parentSubid;
|
|
||||||
else
|
|
||||||
relation->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flush any temporary index list.
|
* Flush any temporary index list.
|
||||||
*/
|
*/
|
||||||
@ -3142,7 +2984,6 @@ RelationBuildLocalRelation(const char *relname,
|
|||||||
/* it's being created in this transaction */
|
/* it's being created in this transaction */
|
||||||
rel->rd_createSubid = GetCurrentSubTransactionId();
|
rel->rd_createSubid = GetCurrentSubTransactionId();
|
||||||
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create a new tuple descriptor from the one passed in. We do this
|
* create a new tuple descriptor from the one passed in. We do this
|
||||||
@ -3413,29 +3254,14 @@ RelationSetNewRelfilenode(Relation relation, char persistence,
|
|||||||
*/
|
*/
|
||||||
CommandCounterIncrement();
|
CommandCounterIncrement();
|
||||||
|
|
||||||
RelationAssumeNewRelfilenode(relation);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationAssumeNewRelfilenode
|
* Mark the rel as having been given a new relfilenode in the current
|
||||||
*
|
* (sub) transaction. This is a hint that can be used to optimize later
|
||||||
* Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
|
* operations on the rel in the same transaction.
|
||||||
* this. The call shall precede any code that might insert WAL records whose
|
|
||||||
* replay would modify bytes in the new RelFileNode, and the call shall follow
|
|
||||||
* any WAL modifying bytes in the prior RelFileNode. See struct RelationData.
|
|
||||||
* Ideally, call this as near as possible to the CommandCounterIncrement()
|
|
||||||
* that makes the pg_class change visible (before it or after it); that
|
|
||||||
* minimizes the chance of future development adding a forbidden WAL insertion
|
|
||||||
* between RelationAssumeNewRelfilenode() and CommandCounterIncrement().
|
|
||||||
*/
|
*/
|
||||||
void
|
|
||||||
RelationAssumeNewRelfilenode(Relation relation)
|
|
||||||
{
|
|
||||||
relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
|
relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
|
||||||
if (relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)
|
|
||||||
relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
|
|
||||||
|
|
||||||
/* Flag relation as needing eoxact cleanup (to clear these fields) */
|
/* Flag relation as needing eoxact cleanup (to remove the hint) */
|
||||||
EOXactListAdd(relation);
|
EOXactListAdd(relation);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5434,8 +5260,6 @@ load_relcache_init_file(bool shared)
|
|||||||
rel->rd_idattr = NULL;
|
rel->rd_idattr = NULL;
|
||||||
rel->rd_createSubid = InvalidSubTransactionId;
|
rel->rd_createSubid = InvalidSubTransactionId;
|
||||||
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
|
||||||
rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
|
|
||||||
rel->rd_droppedSubid = InvalidSubTransactionId;
|
|
||||||
rel->rd_amcache = NULL;
|
rel->rd_amcache = NULL;
|
||||||
MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
|
MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
|
||||||
|
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
#include "access/twophase.h"
|
#include "access/twophase.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "catalog/namespace.h"
|
#include "catalog/namespace.h"
|
||||||
#include "catalog/storage.h"
|
|
||||||
#include "commands/async.h"
|
#include "commands/async.h"
|
||||||
#include "commands/prepare.h"
|
#include "commands/prepare.h"
|
||||||
#include "commands/vacuum.h"
|
#include "commands/vacuum.h"
|
||||||
@ -2323,17 +2322,6 @@ static struct config_int ConfigureNamesInt[] =
|
|||||||
NULL, NULL, NULL
|
NULL, NULL, NULL
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
|
||||||
{"wal_skip_threshold", PGC_USERSET, WAL_SETTINGS,
|
|
||||||
gettext_noop("Size of new file to fsync instead of writing WAL."),
|
|
||||||
NULL,
|
|
||||||
GUC_UNIT_KB
|
|
||||||
},
|
|
||||||
&wal_skip_threshold,
|
|
||||||
2048, 0, MAX_KILOBYTES,
|
|
||||||
NULL, NULL, NULL
|
|
||||||
},
|
|
||||||
|
|
||||||
{
|
{
|
||||||
/* see max_connections */
|
/* see max_connections */
|
||||||
{"max_wal_senders", PGC_POSTMASTER, REPLICATION_SENDING,
|
{"max_wal_senders", PGC_POSTMASTER, REPLICATION_SENDING,
|
||||||
|
@ -197,7 +197,6 @@
|
|||||||
# (change requires restart)
|
# (change requires restart)
|
||||||
#wal_writer_delay = 200ms # 1-10000 milliseconds
|
#wal_writer_delay = 200ms # 1-10000 milliseconds
|
||||||
#wal_writer_flush_after = 1MB # measured in pages, 0 disables
|
#wal_writer_flush_after = 1MB # measured in pages, 0 disables
|
||||||
#wal_skip_threshold = 2MB
|
|
||||||
|
|
||||||
#commit_delay = 0 # range 0-100000, in microseconds
|
#commit_delay = 0 # range 0-100000, in microseconds
|
||||||
#commit_siblings = 5 # range 1-1000
|
#commit_siblings = 5 # range 1-1000
|
||||||
|
@ -189,7 +189,6 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
|
|||||||
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
|
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
|
||||||
#define XLOG_GIST_CREATE_INDEX 0x50
|
#define XLOG_GIST_CREATE_INDEX 0x50
|
||||||
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
|
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
|
||||||
#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Backup Blk 0: updated page.
|
* Backup Blk 0: updated page.
|
||||||
@ -478,8 +477,6 @@ extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
|
|||||||
BlockNumber origrlink, GistNSN oldnsn,
|
BlockNumber origrlink, GistNSN oldnsn,
|
||||||
Buffer leftchild, bool markfollowright);
|
Buffer leftchild, bool markfollowright);
|
||||||
|
|
||||||
extern XLogRecPtr gistXLogAssignLSN(void);
|
|
||||||
|
|
||||||
/* gistget.c */
|
/* gistget.c */
|
||||||
extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
|
extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
|
||||||
extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
|
extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
|
|
||||||
/* "options" flag bits for heap_insert */
|
/* "options" flag bits for heap_insert */
|
||||||
|
#define HEAP_INSERT_SKIP_WAL 0x0001
|
||||||
#define HEAP_INSERT_SKIP_FSM 0x0002
|
#define HEAP_INSERT_SKIP_FSM 0x0002
|
||||||
#define HEAP_INSERT_FROZEN 0x0004
|
#define HEAP_INSERT_FROZEN 0x0004
|
||||||
#define HEAP_INSERT_SPECULATIVE 0x0008
|
#define HEAP_INSERT_SPECULATIVE 0x0008
|
||||||
|
@ -23,7 +23,7 @@ typedef struct RewriteStateData *RewriteState;
|
|||||||
|
|
||||||
extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
|
extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
|
||||||
TransactionId OldestXmin, TransactionId FreezeXid,
|
TransactionId OldestXmin, TransactionId FreezeXid,
|
||||||
MultiXactId MultiXactCutoff);
|
MultiXactId MultiXactCutoff, bool use_wal);
|
||||||
extern void end_heap_rewrite(RewriteState state);
|
extern void end_heap_rewrite(RewriteState state);
|
||||||
extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
|
extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
|
||||||
HeapTuple newTuple);
|
HeapTuple newTuple);
|
||||||
|
@ -18,22 +18,16 @@
|
|||||||
#include "storage/relfilenode.h"
|
#include "storage/relfilenode.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
|
|
||||||
/* GUC variables */
|
|
||||||
extern int wal_skip_threshold;
|
|
||||||
|
|
||||||
extern void RelationCreateStorage(RelFileNode rnode, char relpersistence);
|
extern void RelationCreateStorage(RelFileNode rnode, char relpersistence);
|
||||||
extern void RelationDropStorage(Relation rel);
|
extern void RelationDropStorage(Relation rel);
|
||||||
extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
|
extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
|
||||||
extern void RelationPreTruncate(Relation rel);
|
|
||||||
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
|
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
|
||||||
extern bool RelFileNodeSkippingWAL(RelFileNode rnode);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These functions used to be in storage/smgr/smgr.c, which explains the
|
* These functions used to be in storage/smgr/smgr.c, which explains the
|
||||||
* naming
|
* naming
|
||||||
*/
|
*/
|
||||||
extern void smgrDoPendingDeletes(bool isCommit);
|
extern void smgrDoPendingDeletes(bool isCommit);
|
||||||
extern void smgrDoPendingSyncs(bool isCommit);
|
|
||||||
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
|
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
|
||||||
extern void AtSubCommit_smgr(void);
|
extern void AtSubCommit_smgr(void);
|
||||||
extern void AtSubAbort_smgr(void);
|
extern void AtSubAbort_smgr(void);
|
||||||
|
@ -2454,9 +2454,6 @@ typedef struct IndexStmt
|
|||||||
bool transformed; /* true when transformIndexStmt is finished */
|
bool transformed; /* true when transformIndexStmt is finished */
|
||||||
bool concurrent; /* should this be a concurrent index build? */
|
bool concurrent; /* should this be a concurrent index build? */
|
||||||
bool if_not_exists; /* just do nothing if index already exists? */
|
bool if_not_exists; /* just do nothing if index already exists? */
|
||||||
SubTransactionId oldCreateSubid; /* rd_createSubid of oldNode */
|
|
||||||
SubTransactionId oldFirstRelfilenodeSubid; /* rd_firstRelfilenodeSubid of
|
|
||||||
* oldNode */
|
|
||||||
} IndexStmt;
|
} IndexStmt;
|
||||||
|
|
||||||
/* ----------------------
|
/* ----------------------
|
||||||
|
@ -50,9 +50,6 @@ typedef enum
|
|||||||
/* forward declared, to avoid having to expose buf_internals.h here */
|
/* forward declared, to avoid having to expose buf_internals.h here */
|
||||||
struct WritebackContext;
|
struct WritebackContext;
|
||||||
|
|
||||||
/* forward declared, to avoid including smgr.h here */
|
|
||||||
struct SMgrRelationData;
|
|
||||||
|
|
||||||
/* in globals.c ... this duplicates miscadmin.h */
|
/* in globals.c ... this duplicates miscadmin.h */
|
||||||
extern PGDLLIMPORT int NBuffers;
|
extern PGDLLIMPORT int NBuffers;
|
||||||
|
|
||||||
@ -193,7 +190,6 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
|
|||||||
ForkNumber forkNum);
|
ForkNumber forkNum);
|
||||||
extern void FlushOneBuffer(Buffer buffer);
|
extern void FlushOneBuffer(Buffer buffer);
|
||||||
extern void FlushRelationBuffers(Relation rel);
|
extern void FlushRelationBuffers(Relation rel);
|
||||||
extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
|
|
||||||
extern void FlushDatabaseBuffers(Oid dbid);
|
extern void FlushDatabaseBuffers(Oid dbid);
|
||||||
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
|
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
|
||||||
ForkNumber forkNum, BlockNumber firstDelBlock);
|
ForkNumber forkNum, BlockNumber firstDelBlock);
|
||||||
|
@ -541,9 +541,6 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
|
|||||||
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
|
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
|
||||||
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
||||||
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
extern HTAB *GetLockMethodLocalHash(void);
|
|
||||||
#endif
|
|
||||||
extern bool LockHasWaiters(const LOCKTAG *locktag,
|
extern bool LockHasWaiters(const LOCKTAG *locktag,
|
||||||
LOCKMODE lockmode, bool sessionLock);
|
LOCKMODE lockmode, bool sessionLock);
|
||||||
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
|
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
|
||||||
|
@ -87,7 +87,6 @@ extern void smgrcloseall(void);
|
|||||||
extern void smgrclosenode(RelFileNodeBackend rnode);
|
extern void smgrclosenode(RelFileNodeBackend rnode);
|
||||||
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||||
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
|
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
|
||||||
extern void smgrdosyncall(SMgrRelation *rels, int nrels);
|
|
||||||
extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
|
extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
|
||||||
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||||
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
|
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
|
@ -65,43 +65,25 @@ typedef struct RelationData
|
|||||||
|
|
||||||
/*----------
|
/*----------
|
||||||
* rd_createSubid is the ID of the highest subtransaction the rel has
|
* rd_createSubid is the ID of the highest subtransaction the rel has
|
||||||
* survived into or zero if the rel or its rd_node was created before the
|
* survived into; or zero if the rel was not created in the current top
|
||||||
* current top transaction. (IndexStmt.oldNode leads to the case of a new
|
* transaction. This can be now be relied on, whereas previously it could
|
||||||
* rel with an old rd_node.) rd_firstRelfilenodeSubid is the ID of the
|
* be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
|
||||||
* highest subtransaction an rd_node change has survived into or zero if
|
* the ID of the highest subtransaction the relfilenode change has
|
||||||
* rd_node matches the value it had at the start of the current top
|
* survived into, or zero if not changed in the current transaction (or we
|
||||||
* transaction. (Rolling back the subtransaction that
|
* have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
|
||||||
* rd_firstRelfilenodeSubid denotes would restore rd_node to the value it
|
* when a relation has multiple new relfilenodes within a single
|
||||||
* had at the start of the current top transaction. Rolling back any
|
* transaction, with one of them occurring in a subsequently aborted
|
||||||
* lower subtransaction would not.) Their accuracy is critical to
|
* subtransaction, e.g.
|
||||||
* RelationNeedsWAL().
|
|
||||||
*
|
|
||||||
* rd_newRelfilenodeSubid is the ID of the highest subtransaction the
|
|
||||||
* most-recent relfilenode change has survived into or zero if not changed
|
|
||||||
* in the current transaction (or we have forgotten changing it). This
|
|
||||||
* field is accurate when non-zero, but it can be zero when a relation has
|
|
||||||
* multiple new relfilenodes within a single transaction, with one of them
|
|
||||||
* occurring in a subsequently aborted subtransaction, e.g.
|
|
||||||
* BEGIN;
|
* BEGIN;
|
||||||
* TRUNCATE t;
|
* TRUNCATE t;
|
||||||
* SAVEPOINT save;
|
* SAVEPOINT save;
|
||||||
* TRUNCATE t;
|
* TRUNCATE t;
|
||||||
* ROLLBACK TO save;
|
* ROLLBACK TO save;
|
||||||
* -- rd_newRelfilenodeSubid is now forgotten
|
* -- rd_newRelfilenodeSubid is now forgotten
|
||||||
*
|
|
||||||
* If every rd_*Subid field is zero, they are read-only outside
|
|
||||||
* relcache.c. Files that trigger rd_node changes by updating
|
|
||||||
* pg_class.reltablespace and/or pg_class.relfilenode call
|
|
||||||
* RelationAssumeNewRelfilenode() to update rd_*Subid.
|
|
||||||
*
|
|
||||||
* rd_droppedSubid is the ID of the highest subtransaction that a drop of
|
|
||||||
* the rel has survived into. In entries visible outside relcache.c, this
|
|
||||||
* is always zero.
|
|
||||||
*/
|
*/
|
||||||
SubTransactionId rd_createSubid; /* rel was created in current xact */
|
SubTransactionId rd_createSubid; /* rel was created in current xact */
|
||||||
SubTransactionId rd_newRelfilenodeSubid; /* highest subxact changing
|
SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
|
||||||
* rd_node to current value */
|
* current xact */
|
||||||
/* see end for rd_firstRelfilenodeSubid and rd_droppedSubid */
|
|
||||||
|
|
||||||
Form_pg_class rd_rel; /* RELATION tuple */
|
Form_pg_class rd_rel; /* RELATION tuple */
|
||||||
TupleDesc rd_att; /* tuple descriptor */
|
TupleDesc rd_att; /* tuple descriptor */
|
||||||
@ -195,10 +177,6 @@ typedef struct RelationData
|
|||||||
|
|
||||||
/* use "struct" here to avoid needing to include pgstat.h: */
|
/* use "struct" here to avoid needing to include pgstat.h: */
|
||||||
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
|
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
|
||||||
|
|
||||||
SubTransactionId rd_firstRelfilenodeSubid; /* highest subxact changing
|
|
||||||
* rd_node to any value */
|
|
||||||
SubTransactionId rd_droppedSubid; /* dropped with another Subid set */
|
|
||||||
} RelationData;
|
} RelationData;
|
||||||
|
|
||||||
|
|
||||||
@ -485,16 +463,9 @@ typedef struct ViewOptions
|
|||||||
/*
|
/*
|
||||||
* RelationNeedsWAL
|
* RelationNeedsWAL
|
||||||
* True if relation needs WAL.
|
* True if relation needs WAL.
|
||||||
*
|
|
||||||
* Returns false if wal_level = minimal and this relation is created or
|
|
||||||
* truncated in the current transaction. See "Skipping WAL for New
|
|
||||||
* RelFileNode" in src/backend/access/transam/README.
|
|
||||||
*/
|
*/
|
||||||
#define RelationNeedsWAL(relation) \
|
#define RelationNeedsWAL(relation) \
|
||||||
((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \
|
((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
|
||||||
(XLogIsNeeded() || \
|
|
||||||
(relation->rd_createSubid == InvalidSubTransactionId && \
|
|
||||||
relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationUsesLocalBuffers
|
* RelationUsesLocalBuffers
|
||||||
|
@ -95,11 +95,10 @@ extern Relation RelationBuildLocalRelation(const char *relname,
|
|||||||
char relkind);
|
char relkind);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Routines to manage assignment of new relfilenode to a relation
|
* Routine to manage assignment of new relfilenode to a relation
|
||||||
*/
|
*/
|
||||||
extern void RelationSetNewRelfilenode(Relation relation, char persistence,
|
extern void RelationSetNewRelfilenode(Relation relation, char persistence,
|
||||||
TransactionId freezeXid, MultiXactId minmulti);
|
TransactionId freezeXid, MultiXactId minmulti);
|
||||||
extern void RelationAssumeNewRelfilenode(Relation relation);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Routines for flushing/rebuilding relcache entries in various scenarios
|
* Routines for flushing/rebuilding relcache entries in various scenarios
|
||||||
@ -112,11 +111,6 @@ extern void RelationCacheInvalidate(void);
|
|||||||
|
|
||||||
extern void RelationCloseSmgrByOid(Oid relationId);
|
extern void RelationCloseSmgrByOid(Oid relationId);
|
||||||
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
extern void AssertPendingSyncs_RelationCache(void);
|
|
||||||
#else
|
|
||||||
#define AssertPendingSyncs_RelationCache() do {} while (0)
|
|
||||||
#endif
|
|
||||||
extern void AtEOXact_RelationCache(bool isCommit);
|
extern void AtEOXact_RelationCache(bool isCommit);
|
||||||
extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
|
extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
|
||||||
SubTransactionId parentSubid);
|
SubTransactionId parentSubid);
|
||||||
|
@ -1,372 +0,0 @@
|
|||||||
# Test WAL replay when some operation has skipped WAL.
|
|
||||||
#
|
|
||||||
# These tests exercise code that once violated the mandate described in
|
|
||||||
# src/backend/access/transam/README section "Skipping WAL for New
|
|
||||||
# RelFileNode". The tests work by committing some transactions, initiating an
|
|
||||||
# immediate shutdown, and confirming that the expected data survives recovery.
|
|
||||||
# For many years, individual commands made the decision to skip WAL, hence the
|
|
||||||
# frequent appearance of COPY in these tests.
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
|
|
||||||
use PostgresNode;
|
|
||||||
use TestLib;
|
|
||||||
use Test::More tests => 34;
|
|
||||||
|
|
||||||
sub check_orphan_relfilenodes
|
|
||||||
{
|
|
||||||
my ($node, $test_name) = @_;
|
|
||||||
|
|
||||||
my $db_oid = $node->safe_psql('postgres',
|
|
||||||
"SELECT oid FROM pg_database WHERE datname = 'postgres'");
|
|
||||||
my $prefix = "base/$db_oid/";
|
|
||||||
my $filepaths_referenced = $node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
SELECT pg_relation_filepath(oid) FROM pg_class
|
|
||||||
WHERE reltablespace = 0 AND relpersistence <> 't' AND
|
|
||||||
pg_relation_filepath(oid) IS NOT NULL;");
|
|
||||||
is_deeply(
|
|
||||||
[
|
|
||||||
sort(map { "$prefix$_" }
|
|
||||||
grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix")))
|
|
||||||
],
|
|
||||||
[ sort split /\n/, $filepaths_referenced ],
|
|
||||||
$test_name);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
# We run this same test suite for both wal_level=minimal and replica.
|
|
||||||
sub run_wal_optimize
|
|
||||||
{
|
|
||||||
my $wal_level = shift;
|
|
||||||
|
|
||||||
my $node = get_new_node("node_$wal_level");
|
|
||||||
$node->init;
|
|
||||||
$node->append_conf(
|
|
||||||
'postgresql.conf', qq(
|
|
||||||
wal_level = $wal_level
|
|
||||||
max_prepared_transactions = 1
|
|
||||||
wal_log_hints = on
|
|
||||||
wal_skip_threshold = 0
|
|
||||||
#wal_debug = on
|
|
||||||
));
|
|
||||||
$node->start;
|
|
||||||
|
|
||||||
# Setup
|
|
||||||
my $tablespace_dir = $node->basedir . '/tablespace_other';
|
|
||||||
mkdir($tablespace_dir);
|
|
||||||
$tablespace_dir = TestLib::perl2host($tablespace_dir);
|
|
||||||
$node->safe_psql('postgres',
|
|
||||||
"CREATE TABLESPACE other LOCATION '$tablespace_dir';");
|
|
||||||
|
|
||||||
# Test direct truncation optimization. No tuples.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE trunc (id serial PRIMARY KEY);
|
|
||||||
TRUNCATE trunc;
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
|
|
||||||
is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
|
|
||||||
|
|
||||||
# Test truncation with inserted tuples within the same transaction.
|
|
||||||
# Tuples inserted after the truncation should be seen.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE trunc_ins (id serial PRIMARY KEY);
|
|
||||||
INSERT INTO trunc_ins VALUES (DEFAULT);
|
|
||||||
TRUNCATE trunc_ins;
|
|
||||||
INSERT INTO trunc_ins VALUES (DEFAULT);
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres',
|
|
||||||
"SELECT count(*), min(id) FROM trunc_ins;");
|
|
||||||
is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT");
|
|
||||||
|
|
||||||
# Same for prepared transaction.
|
|
||||||
# Tuples inserted after the truncation should be seen.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE twophase (id serial PRIMARY KEY);
|
|
||||||
INSERT INTO twophase VALUES (DEFAULT);
|
|
||||||
TRUNCATE twophase;
|
|
||||||
INSERT INTO twophase VALUES (DEFAULT);
|
|
||||||
PREPARE TRANSACTION 't';
|
|
||||||
COMMIT PREPARED 't';");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres',
|
|
||||||
"SELECT count(*), min(id) FROM trunc_ins;");
|
|
||||||
is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE");
|
|
||||||
|
|
||||||
# Writing WAL at end of xact, instead of syncing.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
SET wal_skip_threshold = '1TB';
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE noskip (id serial PRIMARY KEY);
|
|
||||||
INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ;
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;");
|
|
||||||
is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL");
|
|
||||||
|
|
||||||
# Data file for COPY query in subsequent tests
|
|
||||||
my $basedir = $node->basedir;
|
|
||||||
my $copy_file = "$basedir/copy_data.txt";
|
|
||||||
TestLib::append_to_file(
|
|
||||||
$copy_file, qq(20000,30000
|
|
||||||
20001,30001
|
|
||||||
20002,30002));
|
|
||||||
|
|
||||||
# Test truncation with inserted tuples using both INSERT and COPY. Tuples
|
|
||||||
# inserted after the truncation should be seen.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000));
|
|
||||||
TRUNCATE ins_trunc;
|
|
||||||
INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
|
|
||||||
COPY ins_trunc FROM '$copy_file' DELIMITER ',';
|
|
||||||
INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;");
|
|
||||||
is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT");
|
|
||||||
|
|
||||||
# Test truncation with inserted tuples using COPY. Tuples copied after
|
|
||||||
# the truncation should be seen.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000));
|
|
||||||
TRUNCATE trunc_copy;
|
|
||||||
COPY trunc_copy FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result =
|
|
||||||
$node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;");
|
|
||||||
is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY");
|
|
||||||
|
|
||||||
# Like previous test, but rollback SET TABLESPACE in a subtransaction.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000));
|
|
||||||
TRUNCATE spc_abort;
|
|
||||||
SAVEPOINT s;
|
|
||||||
ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s;
|
|
||||||
COPY spc_abort FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;");
|
|
||||||
is($result, qq(3),
|
|
||||||
"wal_level = $wal_level, SET TABLESPACE abort subtransaction");
|
|
||||||
|
|
||||||
# in different subtransaction patterns
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000));
|
|
||||||
TRUNCATE spc_commit;
|
|
||||||
SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s;
|
|
||||||
COPY spc_commit FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result =
|
|
||||||
$node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;");
|
|
||||||
is($result, qq(3),
|
|
||||||
"wal_level = $wal_level, SET TABLESPACE commit subtransaction");
|
|
||||||
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000));
|
|
||||||
TRUNCATE spc_nest;
|
|
||||||
SAVEPOINT s;
|
|
||||||
ALTER TABLE spc_nest SET TABLESPACE other;
|
|
||||||
SAVEPOINT s2;
|
|
||||||
ALTER TABLE spc_nest SET TABLESPACE pg_default;
|
|
||||||
ROLLBACK TO s2;
|
|
||||||
SAVEPOINT s2;
|
|
||||||
ALTER TABLE spc_nest SET TABLESPACE pg_default;
|
|
||||||
RELEASE s2;
|
|
||||||
ROLLBACK TO s;
|
|
||||||
COPY spc_nest FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;");
|
|
||||||
is($result, qq(3),
|
|
||||||
"wal_level = $wal_level, SET TABLESPACE nested subtransaction");
|
|
||||||
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
CREATE TABLE spc_hint (id int);
|
|
||||||
INSERT INTO spc_hint VALUES (1);
|
|
||||||
BEGIN;
|
|
||||||
ALTER TABLE spc_hint SET TABLESPACE other;
|
|
||||||
CHECKPOINT;
|
|
||||||
SELECT * FROM spc_hint; -- set hint bit
|
|
||||||
INSERT INTO spc_hint VALUES (2);
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;");
|
|
||||||
is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit");
|
|
||||||
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE idx_hint (c int PRIMARY KEY);
|
|
||||||
SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q;
|
|
||||||
CHECKPOINT;
|
|
||||||
INSERT INTO idx_hint VALUES (1); -- set index hint bit
|
|
||||||
INSERT INTO idx_hint VALUES (2);
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->psql('postgres',);
|
|
||||||
my ($ret, $stdout, $stderr) =
|
|
||||||
$node->psql('postgres', "INSERT INTO idx_hint VALUES (2);");
|
|
||||||
is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD");
|
|
||||||
like(
|
|
||||||
$stderr,
|
|
||||||
qr/violates unique/,
|
|
||||||
"wal_level = $wal_level, unique index LP_DEAD message");
|
|
||||||
|
|
||||||
# UPDATE touches two buffers for one row.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE upd (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000));
|
|
||||||
COPY upd FROM '$copy_file' DELIMITER ',';
|
|
||||||
UPDATE upd SET id2 = id2 + 1;
|
|
||||||
DELETE FROM upd;
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;");
|
|
||||||
is($result, qq(0),
|
|
||||||
"wal_level = $wal_level, UPDATE touches two buffers for one row");
|
|
||||||
|
|
||||||
# Test consistency of COPY with INSERT for table created in the same
|
|
||||||
# transaction.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int);
|
|
||||||
INSERT INTO ins_copy VALUES (DEFAULT, 1);
|
|
||||||
COPY ins_copy FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;");
|
|
||||||
is($result, qq(4), "wal_level = $wal_level, INSERT COPY");
|
|
||||||
|
|
||||||
# Test consistency of COPY that inserts more to the same table using
|
|
||||||
# triggers. If the INSERTS from the trigger go to the same block data
|
|
||||||
# is copied to, and the INSERTs are WAL-logged, WAL replay will fail when
|
|
||||||
# it tries to replay the WAL record but the "before" image doesn't match,
|
|
||||||
# because not all changes were WAL-logged.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text);
|
|
||||||
CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger
|
|
||||||
LANGUAGE plpgsql as \$\$
|
|
||||||
BEGIN
|
|
||||||
IF new.id2 NOT LIKE 'triggered%' THEN
|
|
||||||
INSERT INTO ins_trig
|
|
||||||
VALUES (DEFAULT, 'triggered row before' || NEW.id2);
|
|
||||||
END IF;
|
|
||||||
RETURN NEW;
|
|
||||||
END; \$\$;
|
|
||||||
CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger
|
|
||||||
LANGUAGE plpgsql as \$\$
|
|
||||||
BEGIN
|
|
||||||
IF new.id2 NOT LIKE 'triggered%' THEN
|
|
||||||
INSERT INTO ins_trig
|
|
||||||
VALUES (DEFAULT, 'triggered row after' || NEW.id2);
|
|
||||||
END IF;
|
|
||||||
RETURN NEW;
|
|
||||||
END; \$\$;
|
|
||||||
CREATE TRIGGER ins_trig_before_row_insert
|
|
||||||
BEFORE INSERT ON ins_trig
|
|
||||||
FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig();
|
|
||||||
CREATE TRIGGER ins_trig_after_row_insert
|
|
||||||
AFTER INSERT ON ins_trig
|
|
||||||
FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig();
|
|
||||||
COPY ins_trig FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;");
|
|
||||||
is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers");
|
|
||||||
|
|
||||||
# Test consistency of INSERT, COPY and TRUNCATE in same transaction block
|
|
||||||
# with TRUNCATE triggers.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text);
|
|
||||||
CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger
|
|
||||||
LANGUAGE plpgsql as \$\$
|
|
||||||
BEGIN
|
|
||||||
INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
|
|
||||||
RETURN NULL;
|
|
||||||
END; \$\$;
|
|
||||||
CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger
|
|
||||||
LANGUAGE plpgsql as \$\$
|
|
||||||
BEGIN
|
|
||||||
INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
|
|
||||||
RETURN NULL;
|
|
||||||
END; \$\$;
|
|
||||||
CREATE TRIGGER trunc_trig_before_stat_truncate
|
|
||||||
BEFORE TRUNCATE ON trunc_trig
|
|
||||||
FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig();
|
|
||||||
CREATE TRIGGER trunc_trig_after_stat_truncate
|
|
||||||
AFTER TRUNCATE ON trunc_trig
|
|
||||||
FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig();
|
|
||||||
INSERT INTO trunc_trig VALUES (DEFAULT, 1);
|
|
||||||
TRUNCATE trunc_trig;
|
|
||||||
COPY trunc_trig FROM '$copy_file' DELIMITER ',';
|
|
||||||
COMMIT;");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
$result =
|
|
||||||
$node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;");
|
|
||||||
is($result, qq(4),
|
|
||||||
"wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers");
|
|
||||||
|
|
||||||
# Test redo of temp table creation.
|
|
||||||
$node->safe_psql(
|
|
||||||
'postgres', "
|
|
||||||
CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);");
|
|
||||||
$node->stop('immediate');
|
|
||||||
$node->start;
|
|
||||||
check_orphan_relfilenodes($node,
|
|
||||||
"wal_level = $wal_level, no orphan relfilenode remains");
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run same test suite for multiple wal_level values.
|
|
||||||
run_wal_optimize("minimal");
|
|
||||||
run_wal_optimize("replica");
|
|
@ -2016,12 +2016,6 @@ select * from another;
|
|||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
drop table another;
|
drop table another;
|
||||||
-- Create an index that skips WAL, then perform a SET DATA TYPE that skips
|
|
||||||
-- rewriting the index.
|
|
||||||
begin;
|
|
||||||
create table skip_wal_skip_rewrite_index (c varchar(10) primary key);
|
|
||||||
alter table skip_wal_skip_rewrite_index alter c type varchar(20);
|
|
||||||
commit;
|
|
||||||
-- table's row type
|
-- table's row type
|
||||||
create table tab1 (a int, b text);
|
create table tab1 (a int, b text);
|
||||||
create table tab2 (x int, y tab1);
|
create table tab2 (x int, y tab1);
|
||||||
|
@ -267,16 +267,3 @@ DEALLOCATE select1;
|
|||||||
-- check that the oid column is added before the primary key is checked
|
-- check that the oid column is added before the primary key is checked
|
||||||
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
|
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
|
||||||
DROP TABLE oid_pk;
|
DROP TABLE oid_pk;
|
||||||
-- Verify that subtransaction rollback restores rd_createSubid.
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE remember_create_subid (c int);
|
|
||||||
SAVEPOINT q; DROP TABLE remember_create_subid; ROLLBACK TO q;
|
|
||||||
COMMIT;
|
|
||||||
DROP TABLE remember_create_subid;
|
|
||||||
-- Verify that subtransaction rollback restores rd_firstRelfilenodeSubid.
|
|
||||||
CREATE TABLE remember_node_subid (c int);
|
|
||||||
BEGIN;
|
|
||||||
ALTER TABLE remember_node_subid ALTER c TYPE bigint;
|
|
||||||
SAVEPOINT q; DROP TABLE remember_node_subid; ROLLBACK TO q;
|
|
||||||
COMMIT;
|
|
||||||
DROP TABLE remember_node_subid;
|
|
||||||
|
@ -1348,13 +1348,6 @@ select * from another;
|
|||||||
|
|
||||||
drop table another;
|
drop table another;
|
||||||
|
|
||||||
-- Create an index that skips WAL, then perform a SET DATA TYPE that skips
|
|
||||||
-- rewriting the index.
|
|
||||||
begin;
|
|
||||||
create table skip_wal_skip_rewrite_index (c varchar(10) primary key);
|
|
||||||
alter table skip_wal_skip_rewrite_index alter c type varchar(20);
|
|
||||||
commit;
|
|
||||||
|
|
||||||
-- table's row type
|
-- table's row type
|
||||||
create table tab1 (a int, b text);
|
create table tab1 (a int, b text);
|
||||||
create table tab2 (x int, y tab1);
|
create table tab2 (x int, y tab1);
|
||||||
|
@ -277,18 +277,3 @@ DEALLOCATE select1;
|
|||||||
-- check that the oid column is added before the primary key is checked
|
-- check that the oid column is added before the primary key is checked
|
||||||
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
|
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
|
||||||
DROP TABLE oid_pk;
|
DROP TABLE oid_pk;
|
||||||
|
|
||||||
-- Verify that subtransaction rollback restores rd_createSubid.
|
|
||||||
BEGIN;
|
|
||||||
CREATE TABLE remember_create_subid (c int);
|
|
||||||
SAVEPOINT q; DROP TABLE remember_create_subid; ROLLBACK TO q;
|
|
||||||
COMMIT;
|
|
||||||
DROP TABLE remember_create_subid;
|
|
||||||
|
|
||||||
-- Verify that subtransaction rollback restores rd_firstRelfilenodeSubid.
|
|
||||||
CREATE TABLE remember_node_subid (c int);
|
|
||||||
BEGIN;
|
|
||||||
ALTER TABLE remember_node_subid ALTER c TYPE bigint;
|
|
||||||
SAVEPOINT q; DROP TABLE remember_node_subid; ROLLBACK TO q;
|
|
||||||
COMMIT;
|
|
||||||
DROP TABLE remember_node_subid;
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user