mirror of
https://github.com/postgres/postgres.git
synced 2025-06-16 06:01:02 +03:00
Extend yesterday's patch so that the bgwriter is also told to forget
pending fsyncs during DROP DATABASE. Obviously necessary in hindsight :-(
This commit is contained in:
@ -13,7 +13,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.189 2007/01/16 13:28:56 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.190 2007/01/17 16:25:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -40,6 +40,7 @@
|
|||||||
#include "postmaster/bgwriter.h"
|
#include "postmaster/bgwriter.h"
|
||||||
#include "storage/freespace.h"
|
#include "storage/freespace.h"
|
||||||
#include "storage/procarray.h"
|
#include "storage/procarray.h"
|
||||||
|
#include "storage/smgr.h"
|
||||||
#include "utils/acl.h"
|
#include "utils/acl.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/flatfiles.h"
|
#include "utils/flatfiles.h"
|
||||||
@ -643,6 +644,12 @@ dropdb(const char *dbname, bool missing_ok)
|
|||||||
*/
|
*/
|
||||||
FreeSpaceMapForgetDatabase(db_id);
|
FreeSpaceMapForgetDatabase(db_id);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tell bgwriter to forget any pending fsync requests for files in the
|
||||||
|
* database; else it'll fail at next checkpoint.
|
||||||
|
*/
|
||||||
|
ForgetDatabaseFsyncRequests(db_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Windows, force a checkpoint so that the bgwriter doesn't hold any
|
* On Windows, force a checkpoint so that the bgwriter doesn't hold any
|
||||||
* open files, which would cause rmdir() to fail.
|
* open files, which would cause rmdir() to fail.
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.35 2007/01/17 00:17:20 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.36 2007/01/17 16:25:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -103,7 +103,7 @@
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
RelFileNode rnode;
|
RelFileNode rnode;
|
||||||
BlockNumber segno; /* InvalidBlockNumber means "revoke" */
|
BlockNumber segno; /* see md.c for special values */
|
||||||
/* might add a real request-type field later; not needed yet */
|
/* might add a real request-type field later; not needed yet */
|
||||||
} BgWriterRequest;
|
} BgWriterRequest;
|
||||||
|
|
||||||
@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime)
|
|||||||
* ForwardFsyncRequest
|
* ForwardFsyncRequest
|
||||||
* Forward a file-fsync request from a backend to the bgwriter
|
* Forward a file-fsync request from a backend to the bgwriter
|
||||||
*
|
*
|
||||||
* segno specifies which segment (not block!) of the relation needs to be
|
|
||||||
* fsync'd. If segno == InvalidBlockNumber, the meaning is to revoke any
|
|
||||||
* pending fsync requests for the entire relation (this message is sent
|
|
||||||
* when the relation is about to be deleted).
|
|
||||||
*
|
|
||||||
* Whenever a backend is compelled to write directly to a relation
|
* Whenever a backend is compelled to write directly to a relation
|
||||||
* (which should be seldom, if the bgwriter is getting its job done),
|
* (which should be seldom, if the bgwriter is getting its job done),
|
||||||
* the backend calls this routine to pass over knowledge that the relation
|
* the backend calls this routine to pass over knowledge that the relation
|
||||||
* is dirty and must be fsync'd before next checkpoint.
|
* is dirty and must be fsync'd before next checkpoint.
|
||||||
*
|
*
|
||||||
|
* segno specifies which segment (not block!) of the relation needs to be
|
||||||
|
* fsync'd. (Since the valid range is much less than BlockNumber, we can
|
||||||
|
* use high values for special flags; that's all internal to md.c, which
|
||||||
|
* see for details.)
|
||||||
|
*
|
||||||
* If we are unable to pass over the request (at present, this can happen
|
* If we are unable to pass over the request (at present, this can happen
|
||||||
* if the shared memory queue is full), we return false. That forces
|
* if the shared memory queue is full), we return false. That forces
|
||||||
* the backend to do its own fsync. We hope that will be even more seldom.
|
* the backend to do its own fsync. We hope that will be even more seldom.
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.126 2007/01/17 00:17:21 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.127 2007/01/17 16:25:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -31,6 +31,10 @@
|
|||||||
/* interval for calling AbsorbFsyncRequests in mdsync */
|
/* interval for calling AbsorbFsyncRequests in mdsync */
|
||||||
#define FSYNCS_PER_ABSORB 10
|
#define FSYNCS_PER_ABSORB 10
|
||||||
|
|
||||||
|
/* special values for the segno arg to RememberFsyncRequest */
|
||||||
|
#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
|
||||||
|
#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Windows, we have to interpret EACCES as possibly meaning the same as
|
* On Windows, we have to interpret EACCES as possibly meaning the same as
|
||||||
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
|
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
|
||||||
@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
|||||||
* We have to clean out any pending fsync requests for the doomed relation,
|
* We have to clean out any pending fsync requests for the doomed relation,
|
||||||
* else the next mdsync() will fail.
|
* else the next mdsync() will fail.
|
||||||
*/
|
*/
|
||||||
if (pendingOpsTable)
|
ForgetRelationFsyncRequests(rnode);
|
||||||
{
|
|
||||||
/* standalone backend or startup process: fsync state is local */
|
|
||||||
RememberFsyncRequest(rnode, InvalidBlockNumber);
|
|
||||||
}
|
|
||||||
else if (IsUnderPostmaster)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Notify the bgwriter about it. If we fail to queue the revoke
|
|
||||||
* message, we have to sleep and try again ... ugly, but hopefully
|
|
||||||
* won't happen often.
|
|
||||||
*
|
|
||||||
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
|
|
||||||
* an error would leave the no-longer-used file still present on
|
|
||||||
* disk, which would be bad, so I'm inclined to assume that the
|
|
||||||
* bgwriter will always empty the queue soon.
|
|
||||||
*/
|
|
||||||
while (!ForwardFsyncRequest(rnode, InvalidBlockNumber))
|
|
||||||
pg_usleep(10000L); /* 10 msec seems a good number */
|
|
||||||
/*
|
|
||||||
* Note we don't wait for the bgwriter to actually absorb the
|
|
||||||
* revoke message; see mdsync() for the implications.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
path = relpath(rnode);
|
path = relpath(rnode);
|
||||||
|
|
||||||
@ -894,7 +875,8 @@ mdsync(void)
|
|||||||
* what we will do is retry the whole process after absorbing fsync
|
* what we will do is retry the whole process after absorbing fsync
|
||||||
* request messages again. Since mdunlink() queues a "revoke" message
|
* request messages again. Since mdunlink() queues a "revoke" message
|
||||||
* before actually unlinking, the fsync request is guaranteed to be gone
|
* before actually unlinking, the fsync request is guaranteed to be gone
|
||||||
* the second time if it really was this case.
|
* the second time if it really was this case. DROP DATABASE likewise
|
||||||
|
* has to tell us to forget fsync requests before it starts deletions.
|
||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
HASH_SEQ_STATUS hstat;
|
HASH_SEQ_STATUS hstat;
|
||||||
@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
|
|||||||
* We stuff the fsync request into the local hash table for execution
|
* We stuff the fsync request into the local hash table for execution
|
||||||
* during the bgwriter's next checkpoint.
|
* during the bgwriter's next checkpoint.
|
||||||
*
|
*
|
||||||
* segno == InvalidBlockNumber is a "revoke" request: remove any pending
|
* The range of possible segment numbers is way less than the range of
|
||||||
* fsync requests for the whole relation.
|
* BlockNumber, so we can reserve high values of segno for special purposes.
|
||||||
|
* We define two: FORGET_RELATION_FSYNC means to drop pending fsyncs for
|
||||||
|
* a relation, and FORGET_DATABASE_FSYNC means to drop pending fsyncs for
|
||||||
|
* a whole database. (These are a tad slow because the hash table has to be
|
||||||
|
* searched linearly, but it doesn't seem worth rethinking the table structure
|
||||||
|
* for them.)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||||
{
|
{
|
||||||
Assert(pendingOpsTable);
|
Assert(pendingOpsTable);
|
||||||
|
|
||||||
if (segno != InvalidBlockNumber)
|
if (segno == FORGET_RELATION_FSYNC)
|
||||||
{
|
{
|
||||||
/* Enter a request to fsync this segment */
|
/* Remove any pending requests for the entire relation */
|
||||||
|
HASH_SEQ_STATUS hstat;
|
||||||
|
PendingOperationEntry *entry;
|
||||||
|
|
||||||
|
hash_seq_init(&hstat, pendingOpsTable);
|
||||||
|
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
||||||
|
{
|
||||||
|
if (RelFileNodeEquals(entry->tag.rnode, rnode))
|
||||||
|
{
|
||||||
|
/* Okay, delete this entry */
|
||||||
|
if (hash_search(pendingOpsTable, &entry->tag,
|
||||||
|
HASH_REMOVE, NULL) == NULL)
|
||||||
|
elog(ERROR, "pendingOpsTable corrupted");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (segno == FORGET_DATABASE_FSYNC)
|
||||||
|
{
|
||||||
|
/* Remove any pending requests for the entire database */
|
||||||
|
HASH_SEQ_STATUS hstat;
|
||||||
|
PendingOperationEntry *entry;
|
||||||
|
|
||||||
|
hash_seq_init(&hstat, pendingOpsTable);
|
||||||
|
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
||||||
|
{
|
||||||
|
if (entry->tag.rnode.dbNode == rnode.dbNode)
|
||||||
|
{
|
||||||
|
/* Okay, delete this entry */
|
||||||
|
if (hash_search(pendingOpsTable, &entry->tag,
|
||||||
|
HASH_REMOVE, NULL) == NULL)
|
||||||
|
elog(ERROR, "pendingOpsTable corrupted");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Normal case: enter a request to fsync this segment */
|
||||||
PendingOperationTag key;
|
PendingOperationTag key;
|
||||||
PendingOperationEntry *entry;
|
PendingOperationEntry *entry;
|
||||||
bool found;
|
bool found;
|
||||||
@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
|||||||
if (!found) /* new entry, so initialize it */
|
if (!found) /* new entry, so initialize it */
|
||||||
entry->failures = 0;
|
entry->failures = 0;
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ForgetRelationFsyncRequests(RelFileNode rnode)
|
||||||
|
{
|
||||||
|
if (pendingOpsTable)
|
||||||
|
{
|
||||||
|
/* standalone backend or startup process: fsync state is local */
|
||||||
|
RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
|
||||||
|
}
|
||||||
|
else if (IsUnderPostmaster)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Remove any pending requests for the entire relation. (This is a
|
* Notify the bgwriter about it. If we fail to queue the revoke
|
||||||
* tad slow but it doesn't seem worth rethinking the table structure.)
|
* message, we have to sleep and try again ... ugly, but hopefully
|
||||||
|
* won't happen often.
|
||||||
|
*
|
||||||
|
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
|
||||||
|
* an error would leave the no-longer-used file still present on
|
||||||
|
* disk, which would be bad, so I'm inclined to assume that the
|
||||||
|
* bgwriter will always empty the queue soon.
|
||||||
|
*/
|
||||||
|
while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
|
||||||
|
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||||
|
/*
|
||||||
|
* Note we don't wait for the bgwriter to actually absorb the
|
||||||
|
* revoke message; see mdsync() for the implications.
|
||||||
*/
|
*/
|
||||||
HASH_SEQ_STATUS hstat;
|
|
||||||
PendingOperationEntry *entry;
|
|
||||||
|
|
||||||
hash_seq_init(&hstat, pendingOpsTable);
|
|
||||||
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
|
||||||
{
|
|
||||||
if (RelFileNodeEquals(entry->tag.rnode, rnode))
|
|
||||||
{
|
|
||||||
/* Okay, delete this entry */
|
|
||||||
if (hash_search(pendingOpsTable, &entry->tag,
|
|
||||||
HASH_REMOVE, NULL) == NULL)
|
|
||||||
elog(ERROR, "pendingOpsTable corrupted");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ForgetDatabaseFsyncRequests(Oid dbid)
|
||||||
|
{
|
||||||
|
RelFileNode rnode;
|
||||||
|
|
||||||
|
rnode.dbNode = dbid;
|
||||||
|
rnode.spcNode = 0;
|
||||||
|
rnode.relNode = 0;
|
||||||
|
|
||||||
|
if (pendingOpsTable)
|
||||||
|
{
|
||||||
|
/* standalone backend or startup process: fsync state is local */
|
||||||
|
RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
|
||||||
|
}
|
||||||
|
else if (IsUnderPostmaster)
|
||||||
|
{
|
||||||
|
/* see notes in ForgetRelationFsyncRequests */
|
||||||
|
while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
|
||||||
|
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* _fdvec_alloc() -- Make a MdfdVec object.
|
* _fdvec_alloc() -- Make a MdfdVec object.
|
||||||
*/
|
*/
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.57 2007/01/05 22:19:58 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln);
|
|||||||
extern void mdsync(void);
|
extern void mdsync(void);
|
||||||
|
|
||||||
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
|
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
|
||||||
|
extern void ForgetRelationFsyncRequests(RelFileNode rnode);
|
||||||
|
extern void ForgetDatabaseFsyncRequests(Oid dbid);
|
||||||
|
|
||||||
/* smgrtype.c */
|
/* smgrtype.c */
|
||||||
extern Datum smgrout(PG_FUNCTION_ARGS);
|
extern Datum smgrout(PG_FUNCTION_ARGS);
|
||||||
|
Reference in New Issue
Block a user