1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-20 05:03:10 +03:00

Fix up rickety handling of relation-truncation interlocks.

Move rd_targblock, rd_fsm_nblocks, and rd_vm_nblocks from relcache to the smgr
relation entries, so that they will get reset to InvalidBlockNumber whenever
an smgr-level flush happens.  Because we now send smgr invalidation messages
immediately (not at end of transaction) when a relation truncation occurs,
this ensures that other backends will reset their values before they next
access the relation.  We no longer need the unreliable assumption that a
VACUUM that's doing a truncation will hold its AccessExclusive lock until
commit --- in fact, we can intentionally release that lock as soon as we've
completed the truncation.  This patch therefore reverts (most of) Alvaro's
patch of 2009-11-10, as well as my marginal hacking on it yesterday.  We can
also get rid of assorted no-longer-needed relcache flushes, which are far more
expensive than an smgr flush because they kill a lot more state.

In passing this patch fixes smgr_redo's failure to perform visibility-map
truncation, and cleans up some rather dubious assumptions in freespace.c and
visibilitymap.c about when rd_fsm_nblocks and rd_vm_nblocks can be out of
date.
This commit is contained in:
Tom Lane
2010-02-09 21:43:30 +00:00
parent 79647eed86
commit cbe9d6beb4
15 changed files with 210 additions and 219 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.77 2010/01/02 16:57:34 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.78 2010/02/09 21:43:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,6 +20,7 @@
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
/*
@ -126,7 +127,7 @@ ReadBufferBI(Relation relation, BlockNumber targetBlock,
*
* HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
* relation, if the caller holds exclusive lock and is careful to invalidate
* relation->rd_targblock before the first insertion --- that ensures that
* relation's smgr_targblock before the first insertion --- that ensures that
* all insertions will occur into newly added pages and not be intermixed
* with tuples from other transactions. That way, a crash can't risk losing
* any committed data of other transactions. (See heap_insert's comments
@ -206,7 +207,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
else if (bistate && bistate->current_buf != InvalidBuffer)
targetBlock = BufferGetBlockNumber(bistate->current_buf);
else
targetBlock = relation->rd_targblock;
targetBlock = RelationGetTargetBlock(relation);
if (targetBlock == InvalidBlockNumber && use_fsm)
{
@ -273,7 +274,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
if (len + saveFreeSpace <= pageFreeSpace)
{
/* use this page as future insert target, too */
relation->rd_targblock = targetBlock;
RelationSetTargetBlock(relation, targetBlock);
return buffer;
}
@ -377,7 +378,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* current backend to make more insertions or not, which is probably a
* good bet most of the time. So for now, don't add it to FSM yet.
*/
relation->rd_targblock = BufferGetBlockNumber(buffer);
RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
return buffer;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.7 2010/01/02 16:57:35 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.8 2010/02/09 21:43:29 tgl Exp $
*
* INTERFACE ROUTINES
* visibilitymap_clear - clear a bit in the visibility map
@ -94,7 +94,7 @@
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/inval.h"
/*#define TRACE_VISIBILITYMAP */
@ -291,7 +291,13 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
}
/*
* visibilitymap_test - truncate the visibility map
* visibilitymap_truncate - truncate the visibility map
*
* The caller must hold AccessExclusiveLock on the relation, to ensure that
* other backends receive the smgr invalidation event that this function sends
* before they access the VM again.
*
* nheapblocks is the new size of the heap.
*/
void
visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
@ -307,6 +313,8 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);
#endif
RelationOpenSmgr(rel);
/*
* If no visibility map has been created yet for this relation, there's
* nothing to truncate.
@ -358,23 +366,25 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
else
newnblocks = truncBlock;
if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) < newnblocks)
if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks)
{
/* nothing to do, the file was already smaller than requested size */
return;
}
/* Truncate the unused VM pages, and send smgr inval message */
smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks,
rel->rd_istemp);
/*
* Need to invalidate the relcache entry, because rd_vm_nblocks seen by
* other backends is no longer valid.
* We might as well update the local smgr_vm_nblocks setting.
* smgrtruncate sent an smgr cache inval message, which will cause
* other backends to invalidate their copy of smgr_vm_nblocks, and
* this one too at the next command boundary. But this ensures it
* isn't outright wrong until then.
*/
if (!InRecovery)
CacheInvalidateRelcache(rel);
rel->rd_vm_nblocks = newnblocks;
if (rel->rd_smgr)
rel->rd_smgr->smgr_vm_nblocks = newnblocks;
}
/*
@ -391,21 +401,23 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
RelationOpenSmgr(rel);
/*
* The current size of the visibility map fork is kept in relcache, to
* avoid reading beyond EOF. If we haven't cached the size of the map yet,
* do that first.
* If we haven't cached the size of the visibility map fork yet, check it
* first. Also recheck if the requested block seems to be past end, since
* our cached value might be stale. (We send smgr inval messages on
* truncation, but not on extension.)
*/
if (rel->rd_vm_nblocks == InvalidBlockNumber)
if (rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber ||
blkno >= rel->rd_smgr->smgr_vm_nblocks)
{
if (smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
rel->rd_vm_nblocks = smgrnblocks(rel->rd_smgr,
VISIBILITYMAP_FORKNUM);
rel->rd_smgr->smgr_vm_nblocks = smgrnblocks(rel->rd_smgr,
VISIBILITYMAP_FORKNUM);
else
rel->rd_vm_nblocks = 0;
rel->rd_smgr->smgr_vm_nblocks = 0;
}
/* Handle requests beyond EOF */
if (blkno >= rel->rd_vm_nblocks)
if (blkno >= rel->rd_smgr->smgr_vm_nblocks)
{
if (extend)
vm_extend(rel, blkno + 1);
@ -446,19 +458,23 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
* separate lock tag type for it.
*
* Note that another backend might have extended or created the relation
* before we get the lock.
* by the time we get the lock.
*/
LockRelationForExtension(rel, ExclusiveLock);
/* Create the file first if it doesn't exist */
if ((rel->rd_vm_nblocks == 0 || rel->rd_vm_nblocks == InvalidBlockNumber)
&& !smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
{
/* Might have to re-open if a cache flush happened */
RelationOpenSmgr(rel);
/*
* Create the file first if it doesn't exist. If smgr_vm_nblocks
* is positive then it must exist, no need for an smgrexists call.
*/
if ((rel->rd_smgr->smgr_vm_nblocks == 0 ||
rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber) &&
!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
smgrcreate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, false);
vm_nblocks_now = 0;
}
else
vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
while (vm_nblocks_now < vm_nblocks)
{
@ -467,12 +483,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
vm_nblocks_now++;
}
/* Update local cache with the up-to-date size */
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
UnlockRelationForExtension(rel, ExclusiveLock);
pfree(pg);
/* Update the relcache with the up-to-date size */
if (!InRecovery)
CacheInvalidateRelcache(rel);
rel->rd_vm_nblocks = vm_nblocks_now;
}