mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Fix up rickety handling of relation-truncation interlocks.
Move rd_targblock, rd_fsm_nblocks, and rd_vm_nblocks from relcache to the smgr relation entries, so that they will get reset to InvalidBlockNumber whenever an smgr-level flush happens. Because we now send smgr invalidation messages immediately (not at end of transaction) when a relation truncation occurs, this ensures that other backends will reset their values before they next access the relation. We no longer need the unreliable assumption that a VACUUM that's doing a truncation will hold its AccessExclusive lock until commit --- in fact, we can intentionally release that lock as soon as we've completed the truncation. This patch therefore reverts (most of) Alvaro's patch of 2009-11-10, as well as my marginal hacking on it yesterday. We can also get rid of assorted no-longer-needed relcache flushes, which are far more expensive than an smgr flush because they kill a lot more state. In passing this patch fixes smgr_redo's failure to perform visibility-map truncation, and cleans up some rather dubious assumptions in freespace.c and visibilitymap.c about when rd_fsm_nblocks and rd_vm_nblocks can be out of date.
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.199 2010/02/07 22:40:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.200 2010/02/09 21:43:30 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "miscadmin.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/procarray.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/acl.h"
|
||||
#include "utils/fmgroids.h"
|
||||
#include "utils/inval.h"
|
||||
@@ -559,16 +560,12 @@ mark_index_clustered(Relation rel, Oid indexOid)
|
||||
indexForm->indisclustered = false;
|
||||
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
|
||||
CatalogUpdateIndexes(pg_index, indexTuple);
|
||||
/* Ensure we see the update in the index's relcache entry */
|
||||
CacheInvalidateRelcacheByRelid(thisIndexOid);
|
||||
}
|
||||
else if (thisIndexOid == indexOid)
|
||||
{
|
||||
indexForm->indisclustered = true;
|
||||
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
|
||||
CatalogUpdateIndexes(pg_index, indexTuple);
|
||||
/* Ensure we see the update in the index's relcache entry */
|
||||
CacheInvalidateRelcacheByRelid(thisIndexOid);
|
||||
}
|
||||
heap_freetuple(indexTuple);
|
||||
}
|
||||
@@ -819,8 +816,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
|
||||
XLogReportUnloggedStatement(reason);
|
||||
}
|
||||
|
||||
/* use_wal off requires rd_targblock be initially invalid */
|
||||
Assert(NewHeap->rd_targblock == InvalidBlockNumber);
|
||||
/* use_wal off requires smgr_targblock be initially invalid */
|
||||
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
|
||||
|
||||
/*
|
||||
* If both tables have TOAST tables, perform toast swap by content. It is
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.164 2010/01/02 16:57:37 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.165 2010/02/09 21:43:30 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/acl.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
@@ -220,7 +221,7 @@ DefineSequence(CreateSeqStmt *seq)
|
||||
sm->magic = SEQ_MAGIC;
|
||||
|
||||
/* hack: ensure heap_insert will insert on the just-created page */
|
||||
rel->rd_targblock = 0;
|
||||
RelationSetTargetBlock(rel, 0);
|
||||
|
||||
/* Now form & insert sequence tuple */
|
||||
tuple = heap_form_tuple(tupDesc, value, null);
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.406 2010/02/08 16:50:21 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.407 2010/02/09 21:43:30 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -789,7 +789,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
Oid save_userid;
|
||||
int save_sec_context;
|
||||
int save_nestlevel;
|
||||
bool heldoff;
|
||||
|
||||
if (scanned_all)
|
||||
*scanned_all = false;
|
||||
@@ -970,10 +969,9 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
cluster_rel(relid, InvalidOid, false,
|
||||
(vacstmt->options & VACOPT_VERBOSE) != 0,
|
||||
vacstmt->freeze_min_age, vacstmt->freeze_table_age);
|
||||
heldoff = false;
|
||||
}
|
||||
else
|
||||
heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
|
||||
lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
|
||||
|
||||
/* Roll back any GUC changes executed by index functions */
|
||||
AtEOXact_GUC(false, save_nestlevel);
|
||||
@@ -991,10 +989,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
PopActiveSnapshot();
|
||||
CommitTransactionCommand();
|
||||
|
||||
/* now we can allow interrupts again, if disabled */
|
||||
if (heldoff)
|
||||
RESUME_INTERRUPTS();
|
||||
|
||||
/*
|
||||
* If the relation has a secondary toast rel, vacuum that too while we
|
||||
* still hold the session lock on the master table. Note however that
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.130 2010/02/09 00:28:30 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.131 2010/02/09 21:43:30 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -50,7 +50,6 @@
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/freespace.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/inval.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/pg_rusage.h"
|
||||
@@ -123,7 +122,7 @@ static void lazy_cleanup_index(Relation indrel,
|
||||
LVRelStats *vacrelstats);
|
||||
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
|
||||
int tupindex, LVRelStats *vacrelstats);
|
||||
static bool lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
|
||||
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
|
||||
static BlockNumber count_nondeletable_pages(Relation onerel,
|
||||
LVRelStats *vacrelstats);
|
||||
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
|
||||
@@ -141,11 +140,8 @@ static int vac_cmp_itemptr(const void *left, const void *right);
|
||||
*
|
||||
* At entry, we have already established a transaction and opened
|
||||
* and locked the relation.
|
||||
*
|
||||
* The return value indicates whether this function has held off
|
||||
* interrupts -- if true, caller must RESUME_INTERRUPTS() after commit.
|
||||
*/
|
||||
bool
|
||||
void
|
||||
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
BufferAccessStrategy bstrategy, bool *scanned_all)
|
||||
{
|
||||
@@ -157,7 +153,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
TimestampTz starttime = 0;
|
||||
bool scan_all;
|
||||
TransactionId freezeTableLimit;
|
||||
bool heldoff = false;
|
||||
|
||||
pg_rusage_init(&ru0);
|
||||
|
||||
@@ -194,16 +189,9 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
/* Done with indexes */
|
||||
vac_close_indexes(nindexes, Irel, NoLock);
|
||||
|
||||
/* Vacuum the Free Space Map */
|
||||
FreeSpaceMapVacuum(onerel);
|
||||
|
||||
/*
|
||||
* Optionally truncate the relation.
|
||||
*
|
||||
* NB: there should be as little code as possible after this point,
|
||||
* to minimize the chance of failure as well as the time spent ignoring
|
||||
* cancel/die interrupts.
|
||||
*
|
||||
* Don't even think about it unless we have a shot at releasing a goodly
|
||||
* number of pages. Otherwise, the time taken isn't worth it.
|
||||
*/
|
||||
@@ -211,7 +199,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
if (possibly_freeable > 0 &&
|
||||
(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
|
||||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
|
||||
heldoff = lazy_truncate_heap(onerel, vacrelstats);
|
||||
lazy_truncate_heap(onerel, vacrelstats);
|
||||
|
||||
/* Vacuum the Free Space Map */
|
||||
FreeSpaceMapVacuum(onerel);
|
||||
|
||||
/*
|
||||
* Update statistics in pg_class. But only if we didn't skip any pages;
|
||||
@@ -255,8 +246,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
|
||||
if (scanned_all)
|
||||
*scanned_all = vacrelstats->scanned_all;
|
||||
|
||||
return heldoff;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -996,11 +985,8 @@ lazy_cleanup_index(Relation indrel,
|
||||
|
||||
/*
|
||||
* lazy_truncate_heap - try to truncate off any empty pages at the end
|
||||
*
|
||||
* The return value indicates whether this function has held off
|
||||
* interrupts -- if true, caller must RESUME_INTERRUPTS() after commit.
|
||||
*/
|
||||
static bool
|
||||
static void
|
||||
lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
{
|
||||
BlockNumber old_rel_pages = vacrelstats->rel_pages;
|
||||
@@ -1016,7 +1002,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
* possible considering we already hold a lower-grade lock).
|
||||
*/
|
||||
if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
|
||||
return false;
|
||||
return;
|
||||
|
||||
/*
|
||||
* Now that we have exclusive lock, look to see if the rel has grown
|
||||
@@ -1029,7 +1015,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
/* might as well use the latest news when we update pg_class stats */
|
||||
vacrelstats->rel_pages = new_rel_pages;
|
||||
UnlockRelation(onerel, AccessExclusiveLock);
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1044,35 +1030,23 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
{
|
||||
/* can't do anything after all */
|
||||
UnlockRelation(onerel, AccessExclusiveLock);
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prevent cancel/die interrupts from now till commit. Once we have
|
||||
* truncated, it is essential that we send the sinval message before
|
||||
* releasing exclusive lock on the relation; both of which will
|
||||
* happen during commit. Other backends must receive the sinval
|
||||
* message to reset their rd_targblock values before they can safely
|
||||
* write to the table again. While we can't positively guarantee
|
||||
* no error before commit, we can at least prevent cancel interrupts.
|
||||
*
|
||||
* XXX it would be better if we had a way to send the inval message
|
||||
* nontransactionally; an error after the truncate will mean that the
|
||||
* message is lost. Note however that turning this all into a critical
|
||||
* section would not be an improvement. Making it critical would mean
|
||||
* that an error forces PANIC, whereas losing the sinval will at worst
|
||||
* cause unexpected nonfatal errors in other sessions.
|
||||
*/
|
||||
HOLD_INTERRUPTS();
|
||||
|
||||
/* force relcache inval so all backends reset their rd_targblock */
|
||||
CacheInvalidateRelcache(onerel);
|
||||
|
||||
/*
|
||||
* Okay to truncate. Do as little as possible between here and commit.
|
||||
* Okay to truncate.
|
||||
*/
|
||||
RelationTruncate(onerel, new_rel_pages);
|
||||
|
||||
/*
|
||||
* We can release the exclusive lock as soon as we have truncated. Other
|
||||
* backends can't safely access the relation until they have processed the
|
||||
* smgr invalidation that smgrtruncate sent out ... but that should happen
|
||||
* as part of standard invalidation processing once they acquire lock on
|
||||
* the relation.
|
||||
*/
|
||||
UnlockRelation(onerel, AccessExclusiveLock);
|
||||
|
||||
/* update statistics */
|
||||
vacrelstats->rel_pages = new_rel_pages;
|
||||
vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
|
||||
@@ -1083,8 +1057,6 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
old_rel_pages, new_rel_pages),
|
||||
errdetail("%s.",
|
||||
pg_rusage_show(&ru0))));
|
||||
|
||||
return true; /* interrupts are held off */
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user