mirror of
https://github.com/postgres/postgres.git
synced 2025-07-07 00:36:50 +03:00
Fix longstanding problems in VACUUM caused by untimely interruptions
In VACUUM FULL, an interrupt after the initial transaction has been recorded as committed can cause postmaster to restart with the following error message: PANIC: cannot abort transaction NNNN, it was already committed This problem has been reported many times. In lazy VACUUM, an interrupt after the table has been truncated by lazy_truncate_heap causes other backends' relcache to still point to the removed pages; this can cause future INSERT and UPDATE queries to error out with the following error message: could not read block XX of relation 1663/NNN/MMMM: read only 0 of 8192 bytes The window to this race condition is extremely narrow, but it has been seen in the wild involving a cancelled autovacuum process. The solution for both problems is to inhibit interrupts in both operations until after the respective transactions have been committed. It's not a complete solution, because the transaction could theoretically be aborted by some other error, but at least fixes the most common causes of both problems.
This commit is contained in:
@ -29,7 +29,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.122 2009/08/24 02:18:32 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.123 2009/11/10 18:00:06 alvherre Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -140,8 +140,11 @@ static int vac_cmp_itemptr(const void *left, const void *right);
|
||||
*
|
||||
* At entry, we have already established a transaction and opened
|
||||
* and locked the relation.
|
||||
*
|
||||
* The return value indicates whether this function has held off
|
||||
* interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
|
||||
*/
|
||||
void
|
||||
bool
|
||||
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
BufferAccessStrategy bstrategy, bool *scanned_all)
|
||||
{
|
||||
@ -153,6 +156,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
TimestampTz starttime = 0;
|
||||
bool scan_all;
|
||||
TransactionId freezeTableLimit;
|
||||
bool heldoff = false;
|
||||
|
||||
pg_rusage_init(&ru0);
|
||||
|
||||
@ -194,12 +198,22 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
*
|
||||
* Don't even think about it unless we have a shot at releasing a goodly
|
||||
* number of pages. Otherwise, the time taken isn't worth it.
|
||||
*
|
||||
* Note that after we've truncated the heap, it's too late to abort the
|
||||
* transaction; doing so would lose the sinval messages needed to tell
|
||||
* the other backends about the table being shrunk. We prevent interrupts
|
||||
* in that case; caller is responsible for re-enabling them after
|
||||
* committing the transaction.
|
||||
*/
|
||||
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
|
||||
if (possibly_freeable > 0 &&
|
||||
(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
|
||||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
|
||||
{
|
||||
HOLD_INTERRUPTS();
|
||||
heldoff = true;
|
||||
lazy_truncate_heap(onerel, vacrelstats);
|
||||
}
|
||||
|
||||
/* Vacuum the Free Space Map */
|
||||
FreeSpaceMapVacuum(onerel);
|
||||
@ -246,6 +260,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
|
||||
if (scanned_all)
|
||||
*scanned_all = vacrelstats->scanned_all;
|
||||
|
||||
return heldoff;
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user