mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
2555 lines
65 KiB
C
2555 lines
65 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* bufmgr.c
|
|
* buffer manager interface routines
|
|
*
|
|
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.85 2000/09/29 03:55:45 inoue Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
/*
|
|
*
|
|
* BufferAlloc() -- lookup a buffer in the buffer table. If
|
|
* it isn't there add it, but do not read data into memory.
|
|
* This is used when we are about to reinitialize the
|
|
* buffer so don't care what the current disk contents are.
|
|
* BufferAlloc() also pins the new buffer in memory.
|
|
*
|
|
* ReadBuffer() -- like BufferAlloc() but reads the data
|
|
* on a buffer cache miss.
|
|
*
|
|
* ReleaseBuffer() -- unpin the buffer
|
|
*
|
|
* WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
|
|
* but don't unpin. The disk IO is delayed until buffer
|
|
* replacement.
|
|
*
|
|
* WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
|
|
*
|
|
* FlushBuffer() -- Write buffer immediately. Can unpin, or not,
|
|
* depending on parameter.
|
|
*
|
|
* BufferSync() -- flush all dirty buffers in the buffer pool.
|
|
*
|
|
* InitBufferPool() -- Init the buffer module.
|
|
*
|
|
* See other files:
|
|
* freelist.c -- chooses victim for buffer replacement
|
|
* buf_table.c -- manages the buffer lookup table
|
|
*/
|
|
#include <sys/types.h>
|
|
#include <sys/file.h>
|
|
#include <math.h>
|
|
#include <signal.h>
|
|
|
|
#include "postgres.h"
|
|
#include "executor/execdebug.h"
|
|
#include "miscadmin.h"
|
|
#include "storage/s_lock.h"
|
|
#include "storage/smgr.h"
|
|
#include "utils/relcache.h"
|
|
|
|
extern SPINLOCK BufMgrLock;
|
|
extern long int ReadBufferCount;
|
|
extern long int ReadLocalBufferCount;
|
|
extern long int BufferHitCount;
|
|
extern long int LocalBufferHitCount;
|
|
extern long int BufferFlushCount;
|
|
extern long int LocalBufferFlushCount;
|
|
|
|
/*
|
|
* It's used to avoid disk writes for read-only transactions
|
|
* (i.e. when no one shared buffer was changed by transaction).
|
|
* We set it to true in WriteBuffer/WriteNoReleaseBuffer when
|
|
* marking shared buffer as dirty. We set it to false in xact.c
|
|
* after transaction is committed/aborted.
|
|
*/
|
|
bool SharedBufferChanged = false;
|
|
|
|
static void WaitIO(BufferDesc *buf, SPINLOCK spinlock);
|
|
static void StartBufferIO(BufferDesc *buf, bool forInput);
|
|
static void TerminateBufferIO(BufferDesc *buf);
|
|
static void ContinueBufferIO(BufferDesc *buf, bool forInput);
|
|
extern void AbortBufferIO(void);
|
|
|
|
/*
|
|
* Macro : BUFFER_IS_BROKEN
|
|
* Note that write error doesn't mean the buffer broken
|
|
*/
|
|
#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
|
|
|
|
#ifndef HAS_TEST_AND_SET
|
|
static void SignalIO(BufferDesc *buf);
|
|
extern long *NWaitIOBackendP; /* defined in buf_init.c */
|
|
|
|
#endif /* HAS_TEST_AND_SET */
|
|
|
|
static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
|
|
bool bufferLockHeld);
|
|
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
|
|
bool *foundPtr, bool bufferLockHeld);
|
|
static void SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
|
|
static void ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
|
|
static void BufferSync(void);
|
|
static int BufferReplace(BufferDesc *bufHdr);
|
|
void PrintBufferDescs(void);
|
|
|
|
/* ---------------------------------------------------
|
|
* RelationGetBufferWithBuffer
|
|
* see if the given buffer is what we want
|
|
* if yes, we don't need to bother the buffer manager
|
|
* ---------------------------------------------------
|
|
*/
|
|
Buffer
|
|
RelationGetBufferWithBuffer(Relation relation,
|
|
BlockNumber blockNumber,
|
|
Buffer buffer)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
|
|
if (BufferIsValid(buffer))
|
|
{
|
|
if (!BufferIsLocal(buffer))
|
|
{
|
|
LockRelId *lrelId = &relation->rd_lockInfo.lockRelId;
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
SpinAcquire(BufMgrLock);
|
|
if (bufHdr->tag.blockNum == blockNumber &&
|
|
bufHdr->tag.relId.relId == lrelId->relId &&
|
|
bufHdr->tag.relId.dbId == lrelId->dbId)
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
return buffer;
|
|
}
|
|
return ReadBufferWithBufferLock(relation, blockNumber, true);
|
|
}
|
|
else
|
|
{
|
|
bufHdr = &LocalBufferDescriptors[-buffer - 1];
|
|
if (bufHdr->tag.relId.relId == RelationGetRelid(relation) &&
|
|
bufHdr->tag.blockNum == blockNumber)
|
|
return buffer;
|
|
}
|
|
}
|
|
return ReadBuffer(relation, blockNumber);
|
|
}
|
|
|
|
/*
|
|
* ReadBuffer -- returns a buffer containing the requested
|
|
* block of the requested relation. If the blknum
|
|
* requested is P_NEW, extend the relation file and
|
|
* allocate a new block.
|
|
*
|
|
* Returns: the buffer number for the buffer containing
|
|
* the block read or NULL on an error.
|
|
*
|
|
* Assume when this function is called, that reln has been
|
|
* opened already.
|
|
*/
|
|
|
|
#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG
|
|
* defined */
|
|
|
|
/*
|
|
* ReadBuffer
|
|
*
|
|
*/
|
|
Buffer
|
|
ReadBuffer(Relation reln, BlockNumber blockNum)
|
|
{
|
|
return ReadBufferWithBufferLock(reln, blockNum, false);
|
|
}
|
|
|
|
/*
|
|
* is_userbuffer
|
|
*
|
|
* XXX caller must have already acquired BufMgrLock
|
|
*/
|
|
#ifdef NOT_USED
|
|
static bool
|
|
is_userbuffer(Buffer buffer)
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
|
|
|
if (IsSystemRelationName(buf->blind.relname))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef NOT_USED
|
|
Buffer
|
|
ReadBuffer_Debug(char *file,
|
|
int line,
|
|
Relation reln,
|
|
BlockNumber blockNum)
|
|
{
|
|
Buffer buffer;
|
|
|
|
buffer = ReadBufferWithBufferLock(reln, blockNum, false);
|
|
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
|
|
|
fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
buffer, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[buffer - 1], file, line);
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* ReadBufferWithBufferLock -- does the work of
|
|
* ReadBuffer() but with the possibility that
|
|
* the buffer lock has already been held. this
|
|
* is yet another effort to reduce the number of
|
|
* semops in the system.
|
|
*/
|
|
static Buffer
|
|
ReadBufferWithBufferLock(Relation reln,
|
|
BlockNumber blockNum,
|
|
bool bufferLockHeld)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
int extend; /* extending the file by one block */
|
|
int status;
|
|
bool found;
|
|
bool isLocalBuf;
|
|
|
|
extend = (blockNum == P_NEW);
|
|
isLocalBuf = reln->rd_myxactonly;
|
|
|
|
if (isLocalBuf)
|
|
{
|
|
ReadLocalBufferCount++;
|
|
bufHdr = LocalBufferAlloc(reln, blockNum, &found);
|
|
if (found)
|
|
LocalBufferHitCount++;
|
|
}
|
|
else
|
|
{
|
|
ReadBufferCount++;
|
|
|
|
/*
|
|
* lookup the buffer. IO_IN_PROGRESS is set if the requested
|
|
* block is not currently in memory.
|
|
*/
|
|
bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld);
|
|
if (found)
|
|
BufferHitCount++;
|
|
}
|
|
|
|
if (!bufHdr)
|
|
return InvalidBuffer;
|
|
|
|
/* if it's already in the buffer pool, we're done */
|
|
if (found)
|
|
{
|
|
|
|
/*
|
|
* This happens when a bogus buffer was returned previously and is
|
|
* floating around in the buffer pool. A routine calling this
|
|
* would want this extended.
|
|
*/
|
|
if (extend)
|
|
{
|
|
/* new buffers are zero-filled */
|
|
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
|
|
smgrextend(DEFAULT_SMGR, reln,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
}
|
|
return BufferDescriptorGetBuffer(bufHdr);
|
|
|
|
}
|
|
|
|
/*
|
|
* if we have gotten to this point, the reln pointer must be ok and
|
|
* the relation file must be open.
|
|
*/
|
|
if (extend)
|
|
{
|
|
/* new buffers are zero-filled */
|
|
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
|
|
status = smgrextend(DEFAULT_SMGR, reln,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
}
|
|
else
|
|
{
|
|
status = smgrread(DEFAULT_SMGR, reln, blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
}
|
|
|
|
if (isLocalBuf)
|
|
return BufferDescriptorGetBuffer(bufHdr);
|
|
|
|
/* lock buffer manager again to update IO IN PROGRESS */
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
if (status == SM_FAIL)
|
|
{
|
|
/* IO Failed. cleanup the data structures and go home */
|
|
|
|
if (!BufTableDelete(bufHdr))
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
elog(FATAL, "BufRead: buffer table broken after IO error\n");
|
|
}
|
|
/* remember that BufferAlloc() pinned the buffer */
|
|
UnpinBuffer(bufHdr);
|
|
|
|
/*
|
|
* Have to reset the flag so that anyone waiting for the buffer
|
|
* can tell that the contents are invalid.
|
|
*/
|
|
bufHdr->flags |= BM_IO_ERROR;
|
|
bufHdr->flags &= ~BM_IO_IN_PROGRESS;
|
|
}
|
|
else
|
|
{
|
|
/* IO Succeeded. clear the flags, finish buffer update */
|
|
|
|
bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
|
|
}
|
|
|
|
/* If anyone was waiting for IO to complete, wake them up now */
|
|
TerminateBufferIO(bufHdr);
|
|
|
|
SpinRelease(BufMgrLock);
|
|
|
|
if (status == SM_FAIL)
|
|
return InvalidBuffer;
|
|
|
|
return BufferDescriptorGetBuffer(bufHdr);
|
|
}
|
|
|
|
/*
|
|
* BufferAlloc -- Get a buffer from the buffer pool but dont
|
|
* read it.
|
|
*
|
|
* Returns: descriptor for buffer
|
|
*
|
|
* When this routine returns, the BufMgrLock is guaranteed NOT be held.
|
|
*/
|
|
static BufferDesc *
|
|
BufferAlloc(Relation reln,
|
|
BlockNumber blockNum,
|
|
bool *foundPtr,
|
|
bool bufferLockHeld)
|
|
{
|
|
BufferDesc *buf,
|
|
*buf2;
|
|
BufferTag newTag; /* identity of requested block */
|
|
bool inProgress; /* buffer undergoing IO */
|
|
bool newblock = FALSE;
|
|
|
|
/* create a new tag so we can lookup the buffer */
|
|
/* assume that the relation is already open */
|
|
if (blockNum == P_NEW)
|
|
{
|
|
newblock = TRUE;
|
|
blockNum = smgrnblocks(DEFAULT_SMGR, reln);
|
|
}
|
|
|
|
INIT_BUFFERTAG(&newTag, reln, blockNum);
|
|
|
|
if (!bufferLockHeld)
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
/* see if the block is in the buffer pool already */
|
|
buf = BufTableLookup(&newTag);
|
|
if (buf != NULL)
|
|
{
|
|
|
|
/*
|
|
* Found it. Now, (a) pin the buffer so no one steals it from the
|
|
* buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
|
|
* the buffer into the buffer pool.
|
|
*/
|
|
|
|
PinBuffer(buf);
|
|
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
|
|
|
|
*foundPtr = TRUE;
|
|
if (inProgress) /* confirm end of IO */
|
|
{
|
|
WaitIO(buf, BufMgrLock);
|
|
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
|
|
}
|
|
if (BUFFER_IS_BROKEN(buf))
|
|
{
|
|
|
|
/*
|
|
* I couldn't understand the following old comment. If there's
|
|
* no IO for the buffer and the buffer is BROKEN,it should be
|
|
* read again. So start a new buffer IO here.
|
|
*
|
|
* wierd race condition:
|
|
*
|
|
* We were waiting for someone else to read the buffer. While we
|
|
* were waiting, the reader boof'd in some way, so the
|
|
* contents of the buffer are still invalid. By saying that
|
|
* we didn't find it, we can make the caller reinitialize the
|
|
* buffer. If two processes are waiting for this block, both
|
|
* will read the block. The second one to finish may
|
|
* overwrite any updates made by the first. (Assume higher
|
|
* level synchronization prevents this from happening).
|
|
*
|
|
* This is never going to happen, don't worry about it.
|
|
*/
|
|
*foundPtr = FALSE;
|
|
}
|
|
#ifdef BMTRACE
|
|
_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);
|
|
#endif /* BMTRACE */
|
|
|
|
if (!(*foundPtr))
|
|
StartBufferIO(buf, true);
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return buf;
|
|
}
|
|
|
|
*foundPtr = FALSE;
|
|
|
|
/*
|
|
* Didn't find it in the buffer pool. We'll have to initialize a new
|
|
* buffer. First, grab one from the free list. If it's dirty, flush
|
|
* it to disk. Remember to unlock BufMgr spinlock while doing the IOs.
|
|
*/
|
|
inProgress = FALSE;
|
|
for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
|
|
{
|
|
buf = GetFreeBuffer();
|
|
|
|
/* GetFreeBuffer will abort if it can't find a free buffer */
|
|
Assert(buf);
|
|
|
|
/*
|
|
* There should be exactly one pin on the buffer after it is
|
|
* allocated -- ours. If it had a pin it wouldn't have been on
|
|
* the free list. No one else could have pinned it between
|
|
* GetFreeBuffer and here because we have the BufMgrLock.
|
|
*/
|
|
Assert(buf->refcount == 0);
|
|
buf->refcount = 1;
|
|
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
|
|
|
|
if (buf->flags & BM_DIRTY)
|
|
{
|
|
bool smok;
|
|
|
|
/*
|
|
* skip write error buffers
|
|
*/
|
|
if ((buf->flags & BM_IO_ERROR) != 0)
|
|
{
|
|
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
|
|
buf->refcount--;
|
|
buf = (BufferDesc *) NULL;
|
|
continue;
|
|
}
|
|
/*
|
|
* Set BM_IO_IN_PROGRESS to keep anyone from doing anything
|
|
* with the contents of the buffer while we write it out. We
|
|
* don't really care if they try to read it, but if they can
|
|
* complete a BufferAlloc on it they can then scribble into
|
|
* it, and we'd really like to avoid that while we are
|
|
* flushing the buffer. Setting this flag should block them
|
|
* in WaitIO until we're done.
|
|
*/
|
|
inProgress = TRUE;
|
|
|
|
/*
|
|
* All code paths that acquire this lock pin the buffer first;
|
|
* since no one had it pinned (it just came off the free
|
|
* list), no one else can have this lock.
|
|
*/
|
|
StartBufferIO(buf, false);
|
|
|
|
/*
|
|
* Write the buffer out, being careful to release BufMgrLock
|
|
* before starting the I/O.
|
|
*/
|
|
smok = BufferReplace(buf);
|
|
|
|
if (smok == FALSE)
|
|
{
|
|
elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s",
|
|
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
|
|
inProgress = FALSE;
|
|
buf->flags |= BM_IO_ERROR;
|
|
buf->flags &= ~BM_IO_IN_PROGRESS;
|
|
TerminateBufferIO(buf);
|
|
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
|
|
Assert(buf->refcount > 0);
|
|
buf->refcount--;
|
|
if (buf->refcount == 0)
|
|
{
|
|
AddBufferToFreelist(buf);
|
|
buf->flags |= BM_FREE;
|
|
}
|
|
buf = (BufferDesc *) NULL;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
|
|
* be setted by anyone. - vadim 01/17/97
|
|
*/
|
|
if (buf->flags & BM_JUST_DIRTIED)
|
|
{
|
|
elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing",
|
|
buf->tag.blockNum, buf->blind.relname);
|
|
}
|
|
else
|
|
buf->flags &= ~BM_DIRTY;
|
|
}
|
|
|
|
/*
|
|
* Somebody could have pinned the buffer while we were doing
|
|
* the I/O and had given up the BufMgrLock (though they would
|
|
* be waiting for us to clear the BM_IO_IN_PROGRESS flag).
|
|
* That's why this is a loop -- if so, we need to clear the
|
|
* I/O flags, remove our pin and start all over again.
|
|
*
|
|
* People may be making buffers free at any time, so there's no
|
|
* reason to think that we have an immediate disaster on our
|
|
* hands.
|
|
*/
|
|
if (buf && buf->refcount > 1)
|
|
{
|
|
inProgress = FALSE;
|
|
buf->flags &= ~BM_IO_IN_PROGRESS;
|
|
TerminateBufferIO(buf);
|
|
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
|
|
buf->refcount--;
|
|
buf = (BufferDesc *) NULL;
|
|
}
|
|
|
|
/*
|
|
* Somebody could have allocated another buffer for the same
|
|
* block we are about to read in. (While we flush out the
|
|
* dirty buffer, we don't hold the lock and someone could have
|
|
* allocated another buffer for the same block. The problem is
|
|
* we haven't gotten around to insert the new tag into the
|
|
* buffer table. So we need to check here. -ay 3/95
|
|
*/
|
|
buf2 = BufTableLookup(&newTag);
|
|
if (buf2 != NULL)
|
|
{
|
|
|
|
/*
|
|
* Found it. Someone has already done what we're about to
|
|
* do. We'll just handle this as if it were found in the
|
|
* buffer pool in the first place.
|
|
*/
|
|
if (buf != NULL)
|
|
{
|
|
buf->flags &= ~BM_IO_IN_PROGRESS;
|
|
TerminateBufferIO(buf);
|
|
/* give up the buffer since we don't need it any more */
|
|
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
|
|
Assert(buf->refcount > 0);
|
|
buf->refcount--;
|
|
if (buf->refcount == 0)
|
|
{
|
|
AddBufferToFreelist(buf);
|
|
buf->flags |= BM_FREE;
|
|
}
|
|
}
|
|
|
|
PinBuffer(buf2);
|
|
inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
|
|
|
|
*foundPtr = TRUE;
|
|
if (inProgress)
|
|
{
|
|
WaitIO(buf2, BufMgrLock);
|
|
inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
|
|
}
|
|
if (BUFFER_IS_BROKEN(buf2))
|
|
*foundPtr = FALSE;
|
|
|
|
if (!(*foundPtr))
|
|
StartBufferIO(buf2, true);
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return buf2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* At this point we should have the sole pin on a non-dirty buffer and
|
|
* we may or may not already have the BM_IO_IN_PROGRESS flag set.
|
|
*/
|
|
|
|
/*
|
|
* Change the name of the buffer in the lookup table:
|
|
*
|
|
* Need to update the lookup table before the read starts. If someone
|
|
* comes along looking for the buffer while we are reading it in, we
|
|
* don't want them to allocate a new buffer. For the same reason, we
|
|
* didn't want to erase the buf table entry for the buffer we were
|
|
* writing back until now, either.
|
|
*/
|
|
|
|
if (!BufTableDelete(buf))
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
elog(FATAL, "buffer wasn't in the buffer table\n");
|
|
}
|
|
|
|
/* record the database name and relation name for this buffer */
|
|
strcpy(buf->blind.dbname, DatabaseName);
|
|
strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln));
|
|
|
|
INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
|
|
if (!BufTableInsert(buf))
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
elog(FATAL, "Buffer in lookup table twice \n");
|
|
}
|
|
|
|
/*
|
|
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
|
|
* so no one fiddles with them until the read completes. If this
|
|
* routine has been called simply to allocate a buffer, no io will be
|
|
* attempted, so the flag isnt set.
|
|
*/
|
|
if (!inProgress)
|
|
StartBufferIO(buf, true);
|
|
else
|
|
ContinueBufferIO(buf, true);
|
|
|
|
#ifdef BMTRACE
|
|
_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
|
|
#endif /* BMTRACE */
|
|
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return buf;
|
|
}
|
|
|
|
/*
|
|
* WriteBuffer
|
|
*
|
|
* Marks buffer contents as dirty (actual write happens later).
|
|
*
|
|
* Assume that buffer is pinned. Assume that reln is
|
|
* valid.
|
|
*
|
|
* Side Effects:
|
|
* Pin count is decremented.
|
|
*/
|
|
|
|
#undef WriteBuffer
|
|
|
|
int
|
|
WriteBuffer(Buffer buffer)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
return WriteLocalBuffer(buffer, TRUE);
|
|
|
|
if (BAD_BUFFER_ID(buffer))
|
|
return FALSE;
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
|
|
SharedBufferChanged = true;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(bufHdr->refcount > 0);
|
|
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
|
SetBufferDirtiedByMe(buffer, bufHdr);
|
|
UnpinBuffer(bufHdr);
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#ifdef NOT_USED
|
|
void
|
|
WriteBuffer_Debug(char *file, int line, Buffer buffer)
|
|
{
|
|
WriteBuffer(buffer);
|
|
if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf;
|
|
|
|
buf = &BufferDescriptors[buffer - 1];
|
|
fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
buffer, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[buffer - 1], file, line);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is
|
|
* in the cache and is dirty, mark it clean and copy
|
|
* it to the requested location. This is a logical
|
|
* write, and has been installed to support the cache
|
|
* management code for write-once storage managers.
|
|
*
|
|
* DirtyBufferCopy() -- Copy a given dirty buffer to the requested
|
|
* destination.
|
|
*
|
|
* We treat this as a write. If the requested buffer is in the pool
|
|
* and is dirty, we copy it to the location requested and mark it
|
|
* clean. This routine supports the Sony jukebox storage manager,
|
|
* which agrees to take responsibility for the data once we mark
|
|
* it clean.
|
|
*
|
|
* NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95
|
|
*/
|
|
#ifdef NOT_USED
|
|
void
|
|
DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest)
|
|
{
|
|
BufferDesc *buf;
|
|
BufferTag btag;
|
|
|
|
btag.relId.relId = relid;
|
|
btag.relId.dbId = dbid;
|
|
btag.blockNum = blkno;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
buf = BufTableLookup(&btag);
|
|
|
|
if (buf == (BufferDesc *) NULL
|
|
|| !(buf->flags & BM_DIRTY)
|
|
|| !(buf->flags & BM_VALID))
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* hate to do this holding the lock, but release and reacquire is
|
|
* slower
|
|
*/
|
|
memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ);
|
|
|
|
buf->flags &= ~BM_DIRTY;
|
|
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* FlushBuffer -- like WriteBuffer, but force the page to disk.
|
|
*
|
|
* 'buffer' is known to be dirty/pinned, so there should not be a
|
|
* problem reading the BufferDesc members without the BufMgrLock
|
|
* (nobody should be able to change tags out from under us).
|
|
*
|
|
* Unpin if 'release' is TRUE.
|
|
*/
|
|
int
|
|
FlushBuffer(Buffer buffer, bool release)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
Oid bufdb;
|
|
Relation bufrel;
|
|
int status;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
return FlushLocalBuffer(buffer, release) ? STATUS_OK : STATUS_ERROR;
|
|
|
|
if (BAD_BUFFER_ID(buffer))
|
|
return STATUS_ERROR;
|
|
|
|
Assert(PrivateRefCount[buffer - 1] > 0); /* else caller didn't pin */
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
bufdb = bufHdr->tag.relId.dbId;
|
|
|
|
Assert(bufdb == MyDatabaseId || bufdb == (Oid) NULL);
|
|
bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
|
|
Assert(bufrel != (Relation) NULL);
|
|
|
|
SharedBufferChanged = true;
|
|
|
|
/* To check if block content changed while flushing. - vadim 01/17/97 */
|
|
SpinAcquire(BufMgrLock);
|
|
WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
|
|
bufHdr->flags &= ~BM_JUST_DIRTIED;
|
|
StartBufferIO(bufHdr, false); /* output IO start */
|
|
|
|
SpinRelease(BufMgrLock);
|
|
|
|
/*
|
|
* Grab a read lock on the buffer to ensure that no
|
|
* other backend changes its contents while we write it;
|
|
* see comments in BufferSync().
|
|
*/
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
|
|
|
|
status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
|
|
|
|
/* drop relcache refcnt incremented by RelationIdCacheGetRelation */
|
|
RelationDecrementReferenceCount(bufrel);
|
|
|
|
if (status == SM_FAIL)
|
|
{
|
|
elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s",
|
|
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
|
return STATUS_ERROR;
|
|
}
|
|
BufferFlushCount++;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
|
|
TerminateBufferIO(bufHdr); /* output IO finished */
|
|
|
|
/*
|
|
* If this buffer was marked by someone as DIRTY while we were
|
|
* flushing it out we must not clear shared DIRTY flag - vadim
|
|
* 01/17/97
|
|
*
|
|
* ... but we can clear BufferDirtiedByMe anyway - tgl 3/31/00
|
|
*/
|
|
if (bufHdr->flags & BM_JUST_DIRTIED)
|
|
{
|
|
elog(NOTICE, "FlushBuffer: content of block %u (%s) changed while flushing",
|
|
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
|
}
|
|
else
|
|
bufHdr->flags &= ~BM_DIRTY;
|
|
ClearBufferDirtiedByMe(buffer, bufHdr);
|
|
if (release)
|
|
UnpinBuffer(bufHdr);
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
/*
|
|
* WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
|
|
* when the operation is complete.
|
|
*/
|
|
int
|
|
WriteNoReleaseBuffer(Buffer buffer)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
return WriteLocalBuffer(buffer, FALSE);
|
|
|
|
if (BAD_BUFFER_ID(buffer))
|
|
return STATUS_ERROR;
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
|
|
SharedBufferChanged = true;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(bufHdr->refcount > 0);
|
|
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
|
SetBufferDirtiedByMe(buffer, bufHdr);
|
|
SpinRelease(BufMgrLock);
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
|
|
#undef ReleaseAndReadBuffer
|
|
/*
|
|
* ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
|
|
* so that only one semop needs to be called.
|
|
*
|
|
*/
|
|
Buffer
|
|
ReleaseAndReadBuffer(Buffer buffer,
|
|
Relation relation,
|
|
BlockNumber blockNum)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
Buffer retbuf;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
{
|
|
Assert(LocalRefCount[-buffer - 1] > 0);
|
|
LocalRefCount[-buffer - 1]--;
|
|
}
|
|
else
|
|
{
|
|
if (BufferIsValid(buffer))
|
|
{
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
Assert(PrivateRefCount[buffer - 1] > 0);
|
|
PrivateRefCount[buffer - 1]--;
|
|
if (PrivateRefCount[buffer - 1] == 0)
|
|
{
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(bufHdr->refcount > 0);
|
|
bufHdr->refcount--;
|
|
if (bufHdr->refcount == 0)
|
|
{
|
|
AddBufferToFreelist(bufHdr);
|
|
bufHdr->flags |= BM_FREE;
|
|
}
|
|
retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
|
|
return retbuf;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ReadBuffer(relation, blockNum);
|
|
}
|
|
|
|
/*
|
|
* SetBufferDirtiedByMe -- mark a shared buffer as being dirtied by this xact
|
|
*
|
|
* This flag essentially remembers that we need to write and fsync this buffer
|
|
* before we can commit the transaction. The write might end up getting done
|
|
* by another backend, but we must do the fsync ourselves (else we could
|
|
* commit before the data actually reaches disk). We do not issue fsync
|
|
* instantly upon write; the storage manager keeps track of which files need
|
|
* to be fsync'd before commit can occur. A key aspect of this data structure
|
|
* is that we will be able to notify the storage manager that an fsync is
|
|
* needed even after another backend has done the physical write and replaced
|
|
* the buffer contents with something else!
|
|
*
|
|
* NB: we must be holding the bufmgr lock at entry, and the buffer must be
|
|
* pinned so that no other backend can take it away from us.
|
|
*/
|
|
static void
|
|
SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
|
|
{
|
|
BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
|
|
Relation reln;
|
|
int status;
|
|
|
|
/*
|
|
* If the flag is already set, check to see whether the buffertag is
|
|
* the same. If not, some other backend already wrote the buffer data
|
|
* that we dirtied. We must tell the storage manager to make an fsync
|
|
* pending on that file before we can overwrite the old tag value.
|
|
*/
|
|
if (BufferDirtiedByMe[buffer - 1])
|
|
{
|
|
if (bufHdr->tag.relId.dbId == tagLastDirtied->relId.dbId &&
|
|
bufHdr->tag.relId.relId == tagLastDirtied->relId.relId &&
|
|
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
|
|
return; /* Same tag already dirtied, so no work */
|
|
|
|
#ifndef OPTIMIZE_SINGLE
|
|
SpinRelease(BufMgrLock);
|
|
#endif /* OPTIMIZE_SINGLE */
|
|
|
|
reln = RelationIdCacheGetRelation(tagLastDirtied->relId.relId);
|
|
|
|
if (reln == (Relation) NULL)
|
|
{
|
|
status = smgrblindmarkdirty(DEFAULT_SMGR,
|
|
BufferBlindLastDirtied[buffer - 1].dbname,
|
|
BufferBlindLastDirtied[buffer - 1].relname,
|
|
tagLastDirtied->relId.dbId,
|
|
tagLastDirtied->relId.relId,
|
|
tagLastDirtied->blockNum);
|
|
}
|
|
else
|
|
{
|
|
status = smgrmarkdirty(DEFAULT_SMGR, reln,
|
|
tagLastDirtied->blockNum);
|
|
|
|
/*
|
|
* drop relcache refcnt incremented by
|
|
* RelationIdCacheGetRelation
|
|
*/
|
|
RelationDecrementReferenceCount(reln);
|
|
}
|
|
if (status == SM_FAIL)
|
|
{
|
|
elog(ERROR, "SetBufferDirtiedByMe: cannot mark %u for %s",
|
|
tagLastDirtied->blockNum,
|
|
BufferBlindLastDirtied[buffer - 1].relname);
|
|
}
|
|
|
|
#ifndef OPTIMIZE_SINGLE
|
|
SpinAcquire(BufMgrLock);
|
|
#endif /* OPTIMIZE_SINGLE */
|
|
|
|
}
|
|
|
|
*tagLastDirtied = bufHdr->tag;
|
|
BufferBlindLastDirtied[buffer - 1] = bufHdr->blind;
|
|
BufferDirtiedByMe[buffer - 1] = true;
|
|
}
|
|
|
|
/*
|
|
* ClearBufferDirtiedByMe -- mark a shared buffer as no longer needing fsync
|
|
*
|
|
* If we write out a buffer ourselves, then the storage manager will set its
|
|
* needs-fsync flag for that file automatically, and so we can clear our own
|
|
* flag that says it needs to be done later.
|
|
*
|
|
* NB: we must be holding the bufmgr lock at entry.
|
|
*/
|
|
static void
|
|
ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
|
|
{
|
|
BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
|
|
|
|
/*
|
|
* Do *not* clear the flag if it refers to some other buffertag than
|
|
* the data we just wrote. This is unlikely, but possible if some
|
|
* other backend replaced the buffer contents since we set our flag.
|
|
*/
|
|
if (bufHdr->tag.relId.dbId == tagLastDirtied->relId.dbId &&
|
|
bufHdr->tag.relId.relId == tagLastDirtied->relId.relId &&
|
|
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
|
|
BufferDirtiedByMe[buffer - 1] = false;
|
|
}
|
|
|
|
/*
|
|
* BufferSync -- Flush all dirty buffers in the pool.
|
|
*
|
|
* This is called at transaction commit time. We find all buffers
|
|
* that have been dirtied by the current xact and flush them to disk.
|
|
* We do *not* flush dirty buffers that have been dirtied by other xacts.
|
|
* (This is a substantial change from pre-7.0 behavior.)
|
|
*/
|
|
static void
|
|
BufferSync()
|
|
{
|
|
int i;
|
|
BufferDesc *bufHdr;
|
|
int status;
|
|
Relation reln;
|
|
bool didwrite;
|
|
|
|
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
|
|
{
|
|
/* Ignore buffers that were not dirtied by me */
|
|
if (!BufferDirtiedByMe[i])
|
|
continue;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
/*
|
|
* We only need to write if the buffer is still dirty and still
|
|
* contains the same disk page that it contained when we dirtied
|
|
* it. Otherwise, someone else has already written our changes for
|
|
* us, and we need only fsync.
|
|
*
|
|
* (NOTE: it's still possible to do an unnecessary write, if other
|
|
* xacts have written and then re-dirtied the page since our last
|
|
* change to it. But that should be pretty uncommon, and there's
|
|
* no easy way to detect it anyway.)
|
|
*/
|
|
reln = NULL;
|
|
didwrite = false;
|
|
if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
|
|
{
|
|
Oid bufdb;
|
|
Oid bufrel;
|
|
|
|
bufdb = bufHdr->tag.relId.dbId;
|
|
bufrel = bufHdr->tag.relId.relId;
|
|
if (bufdb == BufferTagLastDirtied[i].relId.dbId &&
|
|
bufrel == BufferTagLastDirtied[i].relId.relId &&
|
|
bufHdr->tag.blockNum == BufferTagLastDirtied[i].blockNum)
|
|
{
|
|
|
|
/*
|
|
* Try to find relation for buf. This could fail, if the
|
|
* rel has been flushed from the relcache since we dirtied
|
|
* the page. That should be uncommon, so paying the extra
|
|
* cost of a blind write when it happens seems OK.
|
|
*/
|
|
reln = RelationIdCacheGetRelation(bufrel);
|
|
|
|
/*
|
|
* We have to pin buffer to keep anyone from stealing it
|
|
* from the buffer pool while we are flushing it or
|
|
* waiting in WaitIO. It's bad for GetFreeBuffer in
|
|
* BufferAlloc, but there is no other way to prevent
|
|
* writing into disk block data from some other buffer,
|
|
* getting smgr status of some other block and clearing
|
|
* BM_DIRTY of ... - VAdim 09/16/96
|
|
*/
|
|
PinBuffer(bufHdr);
|
|
if (bufHdr->flags & BM_IO_IN_PROGRESS)
|
|
{
|
|
WaitIO(bufHdr, BufMgrLock);
|
|
UnpinBuffer(bufHdr);
|
|
if (bufHdr->flags & BM_IO_ERROR)
|
|
{
|
|
elog(ERROR, "BufferSync: write error %u for %s",
|
|
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* To check if block content changed while flushing
|
|
* (see below). - vadim 01/17/97
|
|
*/
|
|
WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
|
|
bufHdr->flags &= ~BM_JUST_DIRTIED;
|
|
StartBufferIO(bufHdr, false); /* output IO start */
|
|
|
|
SpinRelease(BufMgrLock);
|
|
|
|
/*
|
|
* Grab a read lock on the buffer to ensure that no
|
|
* other backend changes its contents while we write it;
|
|
* otherwise we could write a non-self-consistent page
|
|
* image to disk, which'd be bad news if the other
|
|
* transaction aborts before writing its changes.
|
|
*
|
|
* Note that we still need the BM_JUST_DIRTIED mechanism
|
|
* in case someone dirties the buffer just before we
|
|
* grab this lock or just after we release it.
|
|
*/
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr),
|
|
BUFFER_LOCK_SHARE);
|
|
|
|
/*
|
|
* If we didn't have the reldesc in our local cache,
|
|
* write this page out using the 'blind write' storage
|
|
* manager routine. If we did find it, use the
|
|
* standard interface.
|
|
*/
|
|
if (reln == (Relation) NULL)
|
|
{
|
|
status = smgrblindwrt(DEFAULT_SMGR,
|
|
bufHdr->blind.dbname,
|
|
bufHdr->blind.relname,
|
|
bufdb, bufrel,
|
|
bufHdr->tag.blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data),
|
|
true); /* must fsync */
|
|
}
|
|
else
|
|
{
|
|
status = smgrwrite(DEFAULT_SMGR, reln,
|
|
bufHdr->tag.blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
}
|
|
|
|
/*
|
|
* Release the per-buffer readlock, reacquire BufMgrLock.
|
|
*/
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr),
|
|
BUFFER_LOCK_UNLOCK);
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
UnpinBuffer(bufHdr);
|
|
if (status == SM_FAIL)
|
|
{
|
|
bufHdr->flags |= BM_IO_ERROR;
|
|
elog(ERROR, "BufferSync: cannot write %u for %s",
|
|
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
|
}
|
|
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
|
|
TerminateBufferIO(bufHdr); /* Sync IO finished */
|
|
BufferFlushCount++;
|
|
didwrite = true;
|
|
|
|
/*
|
|
* If this buffer was marked by someone as DIRTY while
|
|
* we were flushing it out we must not clear DIRTY
|
|
* flag - vadim 01/17/97
|
|
*
|
|
* but it is OK to clear BufferDirtiedByMe - tgl 3/31/00
|
|
*/
|
|
if (!(bufHdr->flags & BM_JUST_DIRTIED))
|
|
bufHdr->flags &= ~BM_DIRTY;
|
|
}
|
|
|
|
/* drop refcnt obtained by RelationIdCacheGetRelation */
|
|
if (reln != (Relation) NULL)
|
|
RelationDecrementReferenceCount(reln);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If we did not write the buffer (because someone else did), we
|
|
* must still fsync the file containing it, to ensure that the
|
|
* write is down to disk before we commit.
|
|
*/
|
|
if (!didwrite)
|
|
{
|
|
#ifndef OPTIMIZE_SINGLE
|
|
SpinRelease(BufMgrLock);
|
|
#endif /* OPTIMIZE_SINGLE */
|
|
|
|
reln = RelationIdCacheGetRelation(BufferTagLastDirtied[i].relId.relId);
|
|
if (reln == (Relation) NULL)
|
|
{
|
|
status = smgrblindmarkdirty(DEFAULT_SMGR,
|
|
BufferBlindLastDirtied[i].dbname,
|
|
BufferBlindLastDirtied[i].relname,
|
|
BufferTagLastDirtied[i].relId.dbId,
|
|
BufferTagLastDirtied[i].relId.relId,
|
|
BufferTagLastDirtied[i].blockNum);
|
|
}
|
|
else
|
|
{
|
|
status = smgrmarkdirty(DEFAULT_SMGR, reln,
|
|
BufferTagLastDirtied[i].blockNum);
|
|
|
|
/*
|
|
* drop relcache refcnt incremented by
|
|
* RelationIdCacheGetRelation
|
|
*/
|
|
RelationDecrementReferenceCount(reln);
|
|
|
|
}
|
|
#ifndef OPTIMIZE_SINGLE
|
|
SpinAcquire(BufMgrLock);
|
|
#endif /* OPTIMIZE_SINGLE */
|
|
}
|
|
|
|
BufferDirtiedByMe[i] = false;
|
|
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
LocalBufferSync();
|
|
}
|
|
|
|
|
|
/*
|
|
* WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
|
|
*
|
|
* Should be entered with buffer manager spinlock held; releases it before
|
|
* waiting and re-acquires it afterwards.
|
|
*
|
|
* OLD NOTES:
|
|
* Because IO_IN_PROGRESS conflicts are
|
|
* expected to be rare, there is only one BufferIO
|
|
* lock in the entire system. All processes block
|
|
* on this semaphore when they try to use a buffer
|
|
* that someone else is faulting in. Whenever a
|
|
* process finishes an IO and someone is waiting for
|
|
* the buffer, BufferIO is signaled (SignalIO). All
|
|
* waiting processes then wake up and check to see
|
|
* if their buffer is now ready. This implementation
|
|
* is simple, but efficient enough if WaitIO is
|
|
* rarely called by multiple processes simultaneously.
|
|
*
|
|
* NEW NOTES:
|
|
* The above is true only on machines without test-and-set
|
|
* semaphores (which we hope are few, these days). On better
|
|
* hardware, each buffer has a spinlock that we can wait on.
|
|
*/
|
|
#ifdef HAS_TEST_AND_SET
|
|
|
|
static void
|
|
WaitIO(BufferDesc *buf, SPINLOCK spinlock)
|
|
{
|
|
|
|
/*
|
|
* Changed to wait until there's no IO - Inoue 01/13/2000
|
|
*/
|
|
while ((buf->flags & BM_IO_IN_PROGRESS) != 0)
|
|
{
|
|
SpinRelease(spinlock);
|
|
S_LOCK(&(buf->io_in_progress_lock));
|
|
S_UNLOCK(&(buf->io_in_progress_lock));
|
|
SpinAcquire(spinlock);
|
|
}
|
|
}
|
|
|
|
#else /* !HAS_TEST_AND_SET */
|
|
|
|
IpcSemaphoreId WaitIOSemId;
|
|
IpcSemaphoreId WaitCLSemId;
|
|
|
|
static void
|
|
WaitIO(BufferDesc *buf, SPINLOCK spinlock)
|
|
{
|
|
bool inProgress;
|
|
|
|
for (;;)
|
|
{
|
|
|
|
/* wait until someone releases IO lock */
|
|
(*NWaitIOBackendP)++;
|
|
SpinRelease(spinlock);
|
|
IpcSemaphoreLock(WaitIOSemId, 0, 1);
|
|
SpinAcquire(spinlock);
|
|
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
|
|
if (!inProgress)
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* SignalIO
|
|
*/
|
|
static void
|
|
SignalIO(BufferDesc *buf)
|
|
{
|
|
/* somebody better be waiting. */
|
|
Assert(buf->refcount > 1);
|
|
IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP);
|
|
*NWaitIOBackendP = 0;
|
|
}
|
|
|
|
#endif /* HAS_TEST_AND_SET */
|
|
|
|
long NDirectFileRead; /* some I/O's are direct file access.
|
|
* bypass bufmgr */
|
|
long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
|
|
|
|
void
|
|
PrintBufferUsage(FILE *statfp)
|
|
{
|
|
float hitrate;
|
|
float localhitrate;
|
|
|
|
if (ReadBufferCount == 0)
|
|
hitrate = 0.0;
|
|
else
|
|
hitrate = (float) BufferHitCount *100.0 / ReadBufferCount;
|
|
|
|
if (ReadLocalBufferCount == 0)
|
|
localhitrate = 0.0;
|
|
else
|
|
localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount;
|
|
|
|
fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
|
|
ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate);
|
|
fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
|
|
ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate);
|
|
fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n",
|
|
NDirectFileRead, NDirectFileWrite);
|
|
}
|
|
|
|
void
|
|
ResetBufferUsage()
|
|
{
|
|
BufferHitCount = 0;
|
|
ReadBufferCount = 0;
|
|
BufferFlushCount = 0;
|
|
LocalBufferHitCount = 0;
|
|
ReadLocalBufferCount = 0;
|
|
LocalBufferFlushCount = 0;
|
|
NDirectFileRead = 0;
|
|
NDirectFileWrite = 0;
|
|
}
|
|
|
|
/* ----------------------------------------------
|
|
* ResetBufferPool
|
|
*
|
|
* This routine is supposed to be called when a transaction aborts.
|
|
* it will release all the buffer pins held by the transaction.
|
|
* Currently, we also call it during commit if BufferPoolCheckLeak
|
|
* detected a problem --- in that case, isCommit is TRUE, and we
|
|
* only clean up buffer pin counts.
|
|
*
|
|
* During abort, we also forget any pending fsync requests. Dirtied buffers
|
|
* will still get written, eventually, but there will be no fsync for them.
|
|
*
|
|
* ----------------------------------------------
|
|
*/
|
|
void
|
|
ResetBufferPool(bool isCommit)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < NBuffers; i++)
|
|
{
|
|
if (PrivateRefCount[i] != 0)
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[i];
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(buf->refcount > 0);
|
|
buf->refcount--;
|
|
if (buf->refcount == 0)
|
|
{
|
|
AddBufferToFreelist(buf);
|
|
buf->flags |= BM_FREE;
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
PrivateRefCount[i] = 0;
|
|
|
|
if (!isCommit)
|
|
BufferDirtiedByMe[i] = false;
|
|
}
|
|
|
|
ResetLocalBufferPool();
|
|
|
|
if (!isCommit)
|
|
smgrabort();
|
|
}
|
|
|
|
/* -----------------------------------------------
|
|
* BufferPoolCheckLeak
|
|
*
|
|
* check if there is buffer leak
|
|
*
|
|
* -----------------------------------------------
|
|
*/
|
|
int
|
|
BufferPoolCheckLeak()
|
|
{
|
|
int i;
|
|
int result = 0;
|
|
|
|
for (i = 1; i <= NBuffers; i++)
|
|
{
|
|
if (PrivateRefCount[i - 1] != 0)
|
|
{
|
|
BufferDesc *buf = &(BufferDescriptors[i - 1]);
|
|
|
|
elog(NOTICE,
|
|
"Buffer Leak: [%03d] (freeNext=%ld, freePrev=%ld, \
|
|
relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
|
|
i - 1, buf->freeNext, buf->freePrev,
|
|
buf->blind.relname, buf->tag.blockNum, buf->flags,
|
|
buf->refcount, PrivateRefCount[i - 1]);
|
|
result = 1;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* ------------------------------------------------
|
|
* FlushBufferPool
|
|
*
|
|
* flush all dirty blocks in buffer pool to disk
|
|
*
|
|
* ------------------------------------------------
|
|
*/
|
|
void
|
|
FlushBufferPool(void)
|
|
{
|
|
BufferSync();
|
|
smgrcommit();
|
|
}
|
|
|
|
/*
|
|
* BufferGetBlockNumber
|
|
* Returns the block number associated with a buffer.
|
|
*
|
|
* Note:
|
|
* Assumes that the buffer is valid.
|
|
*/
|
|
BlockNumber
|
|
BufferGetBlockNumber(Buffer buffer)
|
|
{
|
|
Assert(BufferIsValid(buffer));
|
|
|
|
/* XXX should be a critical section */
|
|
if (BufferIsLocal(buffer))
|
|
return LocalBufferDescriptors[-buffer - 1].tag.blockNum;
|
|
else
|
|
return BufferDescriptors[buffer - 1].tag.blockNum;
|
|
}
|
|
|
|
#ifdef NOT_USED
|
|
/*
|
|
* BufferGetRelation
|
|
* Returns the relation desciptor associated with a buffer.
|
|
*
|
|
* Note:
|
|
* Assumes buffer is valid.
|
|
*/
|
|
Relation
|
|
BufferGetRelation(Buffer buffer)
|
|
{
|
|
Relation relation;
|
|
Oid relid;
|
|
|
|
Assert(BufferIsValid(buffer));
|
|
Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */
|
|
|
|
/* XXX should be a critical section */
|
|
relid = BufferDescriptors[buffer - 1].tag.relId.relId;
|
|
relation = RelationIdGetRelation(relid);
|
|
Assert(relation);
|
|
|
|
/* drop relcache refcnt incremented by RelationIdGetRelation */
|
|
RelationDecrementReferenceCount(relation);
|
|
|
|
if (RelationHasReferenceCountZero(relation))
|
|
{
|
|
|
|
/*
|
|
* XXX why??
|
|
*/
|
|
|
|
RelationIncrementReferenceCount(relation);
|
|
}
|
|
|
|
return relation;
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* BufferReplace
|
|
*
|
|
* Write out the buffer corresponding to 'bufHdr'
|
|
*
|
|
* This routine used to flush the data to disk (ie, force immediate fsync)
|
|
* but that's no longer necessary because BufferSync is smarter than before.
|
|
*
|
|
* BufMgrLock must be held at entry, and the buffer must be pinned.
|
|
*/
|
|
static int
|
|
BufferReplace(BufferDesc *bufHdr)
|
|
{
|
|
Relation reln;
|
|
Oid bufdb,
|
|
bufrel;
|
|
int status;
|
|
|
|
/*
|
|
* first try to find the reldesc in the cache, if no luck, don't
|
|
* bother to build the reldesc from scratch, just do a blind write.
|
|
*/
|
|
bufdb = bufHdr->tag.relId.dbId;
|
|
bufrel = bufHdr->tag.relId.relId;
|
|
|
|
if (bufdb == MyDatabaseId || bufdb == (Oid) NULL)
|
|
reln = RelationIdCacheGetRelation(bufrel);
|
|
else
|
|
reln = (Relation) NULL;
|
|
|
|
/* To check if block content changed while flushing. - vadim 01/17/97 */
|
|
bufHdr->flags &= ~BM_JUST_DIRTIED;
|
|
|
|
SpinRelease(BufMgrLock);
|
|
|
|
/*
|
|
* Grab a read lock on the buffer to ensure that no
|
|
* other backend changes its contents while we write it;
|
|
* see comments in BufferSync().
|
|
*/
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
|
|
|
|
if (reln != (Relation) NULL)
|
|
{
|
|
status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data));
|
|
}
|
|
else
|
|
{
|
|
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
|
|
bufHdr->blind.relname, bufdb, bufrel,
|
|
bufHdr->tag.blockNum,
|
|
(char *) MAKE_PTR(bufHdr->data),
|
|
false); /* no fsync */
|
|
}
|
|
|
|
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
/* drop relcache refcnt incremented by RelationIdCacheGetRelation */
|
|
if (reln != (Relation) NULL)
|
|
RelationDecrementReferenceCount(reln);
|
|
|
|
if (status == SM_FAIL)
|
|
return FALSE;
|
|
|
|
/*
|
|
* If we had marked this buffer as needing to be fsync'd, we can
|
|
* forget about that, because it's now the storage manager's
|
|
* responsibility (but only if we called smgrwrite, not smgrblindwrt).
|
|
*/
|
|
if (reln != (Relation) NULL)
|
|
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
|
|
|
|
BufferFlushCount++;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* RelationGetNumberOfBlocks
|
|
* Returns the buffer descriptor associated with a page in a relation.
|
|
*
|
|
* Note:
|
|
* XXX may fail for huge relations.
|
|
* XXX should be elsewhere.
|
|
* XXX maybe should be hidden
|
|
*/
|
|
BlockNumber
|
|
RelationGetNumberOfBlocks(Relation relation)
|
|
{
|
|
return ((relation->rd_myxactonly) ? relation->rd_nblocks :
|
|
smgrnblocks(DEFAULT_SMGR, relation));
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------
|
|
* ReleaseRelationBuffers
|
|
*
|
|
* This function removes all the buffered pages for a relation
|
|
* from the buffer pool. Dirty pages are simply dropped, without
|
|
* bothering to write them out first. This is used when the
|
|
* relation is about to be deleted. We assume that the caller
|
|
* holds an exclusive lock on the relation, which should assure
|
|
* that no new buffers will be acquired for the rel meanwhile.
|
|
*
|
|
* XXX currently it sequentially searches the buffer pool, should be
|
|
* changed to more clever ways of searching.
|
|
* --------------------------------------------------------------------
|
|
*/
|
|
void
|
|
ReleaseRelationBuffers(Relation rel)
|
|
{
|
|
Oid relid = RelationGetRelid(rel);
|
|
int i;
|
|
BufferDesc *buf;
|
|
|
|
if (rel->rd_myxactonly)
|
|
{
|
|
for (i = 0; i < NLocBuffer; i++)
|
|
{
|
|
buf = &LocalBufferDescriptors[i];
|
|
if (buf->tag.relId.relId == relid)
|
|
{
|
|
buf->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
|
|
LocalRefCount[i] = 0;
|
|
buf->tag.relId.relId = InvalidOid;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
for (i = 1; i <= NBuffers; i++)
|
|
{
|
|
buf = &BufferDescriptors[i - 1];
|
|
recheck:
|
|
if (buf->tag.relId.relId == relid &&
|
|
(buf->tag.relId.dbId == MyDatabaseId ||
|
|
buf->tag.relId.dbId == (Oid) NULL))
|
|
{
|
|
|
|
/*
|
|
* If there is I/O in progress, better wait till it's done;
|
|
* don't want to delete the relation out from under someone
|
|
* who's just trying to flush the buffer!
|
|
*/
|
|
if (buf->flags & BM_IO_IN_PROGRESS)
|
|
{
|
|
WaitIO(buf, BufMgrLock);
|
|
|
|
/*
|
|
* By now, the buffer very possibly belongs to some other
|
|
* rel, so check again before proceeding.
|
|
*/
|
|
goto recheck;
|
|
}
|
|
/* Now we can do what we came for */
|
|
buf->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
|
|
ClearBufferDirtiedByMe(i, buf);
|
|
|
|
/*
|
|
* Release any refcount we may have.
|
|
*
|
|
* This is very probably dead code, and if it isn't then it's
|
|
* probably wrong. I added the Assert to find out --- tgl
|
|
* 11/99.
|
|
*/
|
|
if (!(buf->flags & BM_FREE))
|
|
{
|
|
/* Assert checks that buffer will actually get freed! */
|
|
Assert(PrivateRefCount[i - 1] == 1 &&
|
|
buf->refcount == 1);
|
|
/* ReleaseBuffer expects we do not hold the lock at entry */
|
|
SpinRelease(BufMgrLock);
|
|
ReleaseBuffer(i);
|
|
SpinAcquire(BufMgrLock);
|
|
}
|
|
/*
|
|
* And mark the buffer as no longer occupied by this rel.
|
|
*/
|
|
BufTableDelete(buf);
|
|
}
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------
|
|
* DropBuffers
|
|
*
|
|
* This function removes all the buffers in the buffer cache for a
|
|
* particular database. Dirty pages are simply dropped, without
|
|
* bothering to write them out first. This is used when we destroy a
|
|
* database, to avoid trying to flush data to disk when the directory
|
|
* tree no longer exists. Implementation is pretty similar to
|
|
* ReleaseRelationBuffers() which is for destroying just one relation.
|
|
* --------------------------------------------------------------------
|
|
*/
|
|
void
|
|
DropBuffers(Oid dbid)
|
|
{
|
|
int i;
|
|
BufferDesc *buf;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
for (i = 1; i <= NBuffers; i++)
|
|
{
|
|
buf = &BufferDescriptors[i - 1];
|
|
recheck:
|
|
if (buf->tag.relId.dbId == dbid)
|
|
{
|
|
|
|
/*
|
|
* If there is I/O in progress, better wait till it's done;
|
|
* don't want to delete the database out from under someone
|
|
* who's just trying to flush the buffer!
|
|
*/
|
|
if (buf->flags & BM_IO_IN_PROGRESS)
|
|
{
|
|
WaitIO(buf, BufMgrLock);
|
|
|
|
/*
|
|
* By now, the buffer very possibly belongs to some other
|
|
* DB, so check again before proceeding.
|
|
*/
|
|
goto recheck;
|
|
}
|
|
/* Now we can do what we came for */
|
|
buf->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
|
|
ClearBufferDirtiedByMe(i, buf);
|
|
|
|
/*
|
|
* The thing should be free, if caller has checked that no
|
|
* backends are running in that database.
|
|
*/
|
|
Assert(buf->flags & BM_FREE);
|
|
/*
|
|
* And mark the buffer as no longer occupied by this page.
|
|
*/
|
|
BufTableDelete(buf);
|
|
}
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
/* -----------------------------------------------------------------
|
|
* PrintBufferDescs
|
|
*
|
|
* this function prints all the buffer descriptors, for debugging
|
|
* use only.
|
|
* -----------------------------------------------------------------
|
|
*/
|
|
void
|
|
PrintBufferDescs()
|
|
{
|
|
int i;
|
|
BufferDesc *buf = BufferDescriptors;
|
|
|
|
if (IsUnderPostmaster)
|
|
{
|
|
SpinAcquire(BufMgrLock);
|
|
for (i = 0; i < NBuffers; ++i, ++buf)
|
|
{
|
|
elog(DEBUG, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
|
|
blockNum=%d, flags=0x%x, refcount=%d %ld)",
|
|
i, buf->freeNext, buf->freePrev,
|
|
buf->blind.relname, buf->tag.blockNum, buf->flags,
|
|
buf->refcount, PrivateRefCount[i]);
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
else
|
|
{
|
|
/* interactive backend */
|
|
for (i = 0; i < NBuffers; ++i, ++buf)
|
|
{
|
|
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n",
|
|
i, buf->blind.relname, buf->tag.blockNum,
|
|
buf->flags, buf->refcount, PrivateRefCount[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
PrintPinnedBufs()
|
|
{
|
|
int i;
|
|
BufferDesc *buf = BufferDescriptors;
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
for (i = 0; i < NBuffers; ++i, ++buf)
|
|
{
|
|
if (PrivateRefCount[i] > 0)
|
|
elog(NOTICE, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
|
|
blockNum=%d, flags=0x%x, refcount=%d %ld)\n",
|
|
i, buf->freeNext, buf->freePrev, buf->blind.relname,
|
|
buf->tag.blockNum, buf->flags,
|
|
buf->refcount, PrivateRefCount[i]);
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
/*
|
|
* BufferPoolBlowaway
|
|
*
|
|
* this routine is solely for the purpose of experiments -- sometimes
|
|
* you may want to blowaway whatever is left from the past in buffer
|
|
* pool and start measuring some performance with a clean empty buffer
|
|
* pool.
|
|
*/
|
|
#ifdef NOT_USED
|
|
void
|
|
BufferPoolBlowaway()
|
|
{
|
|
int i;
|
|
|
|
BufferSync();
|
|
for (i = 1; i <= NBuffers; i++)
|
|
{
|
|
if (BufferIsValid(i))
|
|
{
|
|
while (BufferIsValid(i))
|
|
ReleaseBuffer(i);
|
|
}
|
|
BufTableDelete(&BufferDescriptors[i - 1]);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
/* ---------------------------------------------------------------------
|
|
* FlushRelationBuffers
|
|
*
|
|
* This function flushes all dirty pages of a relation out to disk.
|
|
* Furthermore, pages that have blocknumber >= firstDelBlock are
|
|
* actually removed from the buffer pool. An error code is returned
|
|
* if we fail to dump a dirty buffer or if we find one of
|
|
* the target pages is pinned into the cache.
|
|
*
|
|
* This is used by VACUUM before truncating the relation to the given
|
|
* number of blocks. (TRUNCATE TABLE also uses it in the same way.)
|
|
* It might seem unnecessary to flush dirty pages before firstDelBlock,
|
|
* since VACUUM should already have committed its changes. However,
|
|
* it is possible for there still to be dirty pages: if some page
|
|
* had unwritten on-row tuple status updates from a prior transaction,
|
|
* and VACUUM had no additional changes to make to that page, then
|
|
* VACUUM won't have written it. This is harmless in most cases but
|
|
* will break pg_upgrade, which relies on VACUUM to ensure that *all*
|
|
* tuples have correct on-row status. So, we check and flush all
|
|
* dirty pages of the rel regardless of block number.
|
|
*
|
|
* This is also used by RENAME TABLE (with firstDelBlock = 0)
|
|
* to clear out the buffer cache before renaming the physical files of
|
|
* a relation. Without that, some other backend might try to do a
|
|
* blind write of a buffer page (relying on the BlindId of the buffer)
|
|
* and fail because it's not got the right filename anymore.
|
|
*
|
|
* In all cases, the caller should be holding AccessExclusiveLock on
|
|
* the target relation to ensure that no other backend is busy reading
|
|
* more blocks of the relation.
|
|
*
|
|
* Formerly, we considered it an error condition if we found dirty
|
|
* buffers here. However, since BufferSync no longer forces out all
|
|
* dirty buffers at every xact commit, it's possible for dirty buffers
|
|
* to still be present in the cache due to failure of an earlier
|
|
* transaction. So, must flush dirty buffers without complaint.
|
|
*
|
|
* Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
|
|
*
|
|
* XXX currently it sequentially searches the buffer pool, should be
|
|
* changed to more clever ways of searching.
|
|
* --------------------------------------------------------------------
|
|
*/
|
|
int
|
|
FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
|
|
{
|
|
Oid relid = RelationGetRelid(rel);
|
|
int i;
|
|
BufferDesc *buf;
|
|
|
|
if (rel->rd_myxactonly)
|
|
{
|
|
for (i = 0; i < NLocBuffer; i++)
|
|
{
|
|
buf = &LocalBufferDescriptors[i];
|
|
if (buf->tag.relId.relId == relid)
|
|
{
|
|
if (buf->flags & BM_DIRTY)
|
|
{
|
|
if (FlushBuffer(-i - 1, false) != STATUS_OK)
|
|
{
|
|
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
|
|
RelationGetRelationName(rel), firstDelBlock,
|
|
buf->tag.blockNum);
|
|
return -1;
|
|
}
|
|
}
|
|
if (LocalRefCount[i] > 0)
|
|
{
|
|
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
|
|
RelationGetRelationName(rel), firstDelBlock,
|
|
buf->tag.blockNum, LocalRefCount[i]);
|
|
return -2;
|
|
}
|
|
if (buf->tag.blockNum >= firstDelBlock)
|
|
{
|
|
buf->tag.relId.relId = InvalidOid;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
SpinAcquire(BufMgrLock);
|
|
for (i = 0; i < NBuffers; i++)
|
|
{
|
|
buf = &BufferDescriptors[i];
|
|
recheck:
|
|
if (buf->tag.relId.relId == relid &&
|
|
(buf->tag.relId.dbId == MyDatabaseId ||
|
|
buf->tag.relId.dbId == (Oid) NULL))
|
|
{
|
|
if (buf->flags & BM_DIRTY)
|
|
{
|
|
PinBuffer(buf);
|
|
SpinRelease(BufMgrLock);
|
|
if (FlushBuffer(i + 1, true) != STATUS_OK)
|
|
{
|
|
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it",
|
|
RelationGetRelationName(rel), firstDelBlock,
|
|
buf->tag.blockNum,
|
|
PrivateRefCount[i], buf->refcount);
|
|
return -1;
|
|
}
|
|
SpinAcquire(BufMgrLock);
|
|
|
|
/*
|
|
* Buffer could already be reassigned, so must recheck
|
|
* whether it still belongs to rel before freeing it!
|
|
*/
|
|
goto recheck;
|
|
}
|
|
if (!(buf->flags & BM_FREE))
|
|
{
|
|
SpinRelease(BufMgrLock);
|
|
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
|
|
RelationGetRelationName(rel), firstDelBlock,
|
|
buf->tag.blockNum,
|
|
PrivateRefCount[i], buf->refcount);
|
|
return -2;
|
|
}
|
|
if (buf->tag.blockNum >= firstDelBlock)
|
|
{
|
|
BufTableDelete(buf);
|
|
}
|
|
}
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
return 0;
|
|
}
|
|
|
|
#undef ReleaseBuffer
|
|
|
|
/*
|
|
* ReleaseBuffer -- remove the pin on a buffer without
|
|
* marking it dirty.
|
|
*
|
|
*/
|
|
int
|
|
ReleaseBuffer(Buffer buffer)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
{
|
|
Assert(LocalRefCount[-buffer - 1] > 0);
|
|
LocalRefCount[-buffer - 1]--;
|
|
return STATUS_OK;
|
|
}
|
|
|
|
if (BAD_BUFFER_ID(buffer))
|
|
return STATUS_ERROR;
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
|
|
Assert(PrivateRefCount[buffer - 1] > 0);
|
|
PrivateRefCount[buffer - 1]--;
|
|
if (PrivateRefCount[buffer - 1] == 0)
|
|
{
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(bufHdr->refcount > 0);
|
|
bufHdr->refcount--;
|
|
if (bufHdr->refcount == 0)
|
|
{
|
|
AddBufferToFreelist(bufHdr);
|
|
bufHdr->flags |= BM_FREE;
|
|
}
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
#ifdef NOT_USED
|
|
void
|
|
IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
|
|
{
|
|
IncrBufferRefCount(buffer);
|
|
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
|
|
|
fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
buffer, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[buffer - 1], file, line);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef NOT_USED
|
|
void
|
|
ReleaseBuffer_Debug(char *file, int line, Buffer buffer)
|
|
{
|
|
ReleaseBuffer(buffer);
|
|
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
|
|
|
fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
buffer, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[buffer - 1], file, line);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef NOT_USED
|
|
int
|
|
ReleaseAndReadBuffer_Debug(char *file,
|
|
int line,
|
|
Buffer buffer,
|
|
Relation relation,
|
|
BlockNumber blockNum)
|
|
{
|
|
bool bufferValid;
|
|
Buffer b;
|
|
|
|
bufferValid = BufferIsValid(buffer);
|
|
b = ReleaseAndReadBuffer(buffer, relation, blockNum);
|
|
if (ShowPinTrace && bufferValid && BufferIsLocal(buffer)
|
|
&& is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
|
|
|
fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
buffer, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[buffer - 1], file, line);
|
|
}
|
|
if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
|
|
{
|
|
BufferDesc *buf = &BufferDescriptors[b - 1];
|
|
|
|
fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
|
|
refcount = %ld, file: %s, line: %d\n",
|
|
b, buf->blind.relname, buf->tag.blockNum,
|
|
PrivateRefCount[b - 1], file, line);
|
|
}
|
|
return b;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BMTRACE
|
|
|
|
/*
|
|
* trace allocations and deallocations in a circular buffer in
|
|
* shared memory. check the buffer before doing the allocation,
|
|
* and die if there's anything fishy.
|
|
*/
|
|
|
|
_bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType)
|
|
{
|
|
long start,
|
|
cur;
|
|
bmtrace *tb;
|
|
|
|
start = *CurTraceBuf;
|
|
|
|
if (start > 0)
|
|
cur = start - 1;
|
|
else
|
|
cur = BMT_LIMIT - 1;
|
|
|
|
for (;;)
|
|
{
|
|
tb = &TraceBuf[cur];
|
|
if (tb->bmt_op != BMT_NOTUSED)
|
|
{
|
|
if (tb->bmt_buf == bufNo)
|
|
{
|
|
if ((tb->bmt_op == BMT_DEALLOC)
|
|
|| (tb->bmt_dbid == dbId && tb->bmt_relid == relId
|
|
&& tb->bmt_blkno == blkNo))
|
|
goto okay;
|
|
|
|
/* die holding the buffer lock */
|
|
_bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur);
|
|
}
|
|
}
|
|
|
|
if (cur == start)
|
|
goto okay;
|
|
|
|
if (cur == 0)
|
|
cur = BMT_LIMIT - 1;
|
|
else
|
|
cur--;
|
|
}
|
|
|
|
okay:
|
|
tb = &TraceBuf[start];
|
|
tb->bmt_pid = MyProcPid;
|
|
tb->bmt_buf = bufNo;
|
|
tb->bmt_dbid = dbId;
|
|
tb->bmt_relid = relId;
|
|
tb->bmt_blkno = blkNo;
|
|
tb->bmt_op = allocType;
|
|
|
|
*CurTraceBuf = (start + 1) % BMT_LIMIT;
|
|
}
|
|
|
|
_bm_die(Oid dbId, Oid relId, int blkNo, int bufNo,
|
|
int allocType, long start, long cur)
|
|
{
|
|
FILE *fp;
|
|
bmtrace *tb;
|
|
int i;
|
|
|
|
tb = &TraceBuf[cur];
|
|
|
|
if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL)
|
|
elog(FATAL, "buffer alloc trace error and can't open log file");
|
|
|
|
fprintf(fp, "buffer alloc trace detected the following error:\n\n");
|
|
fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n",
|
|
bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"),
|
|
(tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation"));
|
|
|
|
fprintf(fp, "the trace buffer contains:\n");
|
|
|
|
i = start;
|
|
for (;;)
|
|
{
|
|
tb = &TraceBuf[i];
|
|
if (tb->bmt_op != BMT_NOTUSED)
|
|
{
|
|
fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%u,%d> ",
|
|
i, (i == cur ? " ---> " : "\t"),
|
|
tb->bmt_pid, tb->bmt_buf,
|
|
tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno);
|
|
|
|
switch (tb->bmt_op)
|
|
{
|
|
case BMT_ALLOCFND:
|
|
fprintf(fp, "allocate (found)\n");
|
|
break;
|
|
|
|
case BMT_ALLOCNOTFND:
|
|
fprintf(fp, "allocate (not found)\n");
|
|
break;
|
|
|
|
case BMT_DEALLOC:
|
|
fprintf(fp, "deallocate\n");
|
|
break;
|
|
|
|
default:
|
|
fprintf(fp, "unknown op type %d\n", tb->bmt_op);
|
|
break;
|
|
}
|
|
}
|
|
|
|
i = (i + 1) % BMT_LIMIT;
|
|
if (i == start)
|
|
break;
|
|
}
|
|
|
|
fprintf(fp, "\noperation causing error:\n");
|
|
fprintf(fp, "\tpid %d buf %d for <%d,%u,%d> ",
|
|
getpid(), bufNo, dbId, relId, blkNo);
|
|
|
|
switch (allocType)
|
|
{
|
|
case BMT_ALLOCFND:
|
|
fprintf(fp, "allocate (found)\n");
|
|
break;
|
|
|
|
case BMT_ALLOCNOTFND:
|
|
fprintf(fp, "allocate (not found)\n");
|
|
break;
|
|
|
|
case BMT_DEALLOC:
|
|
fprintf(fp, "deallocate\n");
|
|
break;
|
|
|
|
default:
|
|
fprintf(fp, "unknown op type %d\n", allocType);
|
|
break;
|
|
}
|
|
|
|
FreeFile(fp);
|
|
|
|
kill(getpid(), SIGILL);
|
|
}
|
|
|
|
#endif /* BMTRACE */
|
|
|
|
/*
|
|
* SetBufferCommitInfoNeedsSave
|
|
*
|
|
* Mark a buffer dirty when we have updated tuple commit-status bits in it.
|
|
*
|
|
* This is similar to WriteNoReleaseBuffer, except that we do not set
|
|
* SharedBufferChanged or BufferDirtiedByMe, because we have not made a
|
|
* critical change that has to be flushed to disk before xact commit --- the
|
|
* status-bit update could be redone by someone else just as easily. The
|
|
* buffer will be marked dirty, but it will not be written to disk until
|
|
* there is another reason to write it.
|
|
*
|
|
* This routine might get called many times on the same page, if we are making
|
|
* the first scan after commit of an xact that added/deleted many tuples.
|
|
* So, be as quick as we can if the buffer is already dirty.
|
|
*/
|
|
void
|
|
SetBufferCommitInfoNeedsSave(Buffer buffer)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
|
|
if (BufferIsLocal(buffer))
|
|
return;
|
|
|
|
if (BAD_BUFFER_ID(buffer))
|
|
return;
|
|
|
|
bufHdr = &BufferDescriptors[buffer - 1];
|
|
|
|
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
|
|
(BM_DIRTY | BM_JUST_DIRTIED))
|
|
{
|
|
SpinAcquire(BufMgrLock);
|
|
Assert(bufHdr->refcount > 0);
|
|
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
}
|
|
|
|
void
|
|
UnlockBuffers()
|
|
{
|
|
BufferDesc *buf;
|
|
int i;
|
|
|
|
for (i = 0; i < NBuffers; i++)
|
|
{
|
|
if (BufferLocks[i] == 0)
|
|
continue;
|
|
|
|
Assert(BufferIsValid(i + 1));
|
|
buf = &(BufferDescriptors[i]);
|
|
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_LOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
|
|
if (BufferLocks[i] & BL_R_LOCK)
|
|
{
|
|
Assert(buf->r_locks > 0);
|
|
(buf->r_locks)--;
|
|
}
|
|
if (BufferLocks[i] & BL_RI_LOCK)
|
|
{
|
|
|
|
/*
|
|
* Someone else could remove our RI lock when acquiring W
|
|
* lock. This is possible if we came here from elog(ERROR)
|
|
* from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we
|
|
* don't do Assert(buf->ri_lock) here.
|
|
*/
|
|
buf->ri_lock = false;
|
|
}
|
|
if (BufferLocks[i] & BL_W_LOCK)
|
|
{
|
|
Assert(buf->w_lock);
|
|
buf->w_lock = false;
|
|
}
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_UNLOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
BufferLocks[i] = 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
LockBuffer(Buffer buffer, int mode)
|
|
{
|
|
BufferDesc *buf;
|
|
bits8 *buflock;
|
|
|
|
Assert(BufferIsValid(buffer));
|
|
if (BufferIsLocal(buffer))
|
|
return;
|
|
|
|
buf = &(BufferDescriptors[buffer - 1]);
|
|
buflock = &(BufferLocks[buffer - 1]);
|
|
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_LOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
|
|
if (mode == BUFFER_LOCK_UNLOCK)
|
|
{
|
|
if (*buflock & BL_R_LOCK)
|
|
{
|
|
Assert(buf->r_locks > 0);
|
|
Assert(!(buf->w_lock));
|
|
Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK)));
|
|
(buf->r_locks)--;
|
|
*buflock &= ~BL_R_LOCK;
|
|
}
|
|
else if (*buflock & BL_W_LOCK)
|
|
{
|
|
Assert(buf->w_lock);
|
|
Assert(buf->r_locks == 0);
|
|
Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK)));
|
|
buf->w_lock = false;
|
|
*buflock &= ~BL_W_LOCK;
|
|
}
|
|
else
|
|
elog(ERROR, "UNLockBuffer: buffer %lu is not locked", buffer);
|
|
}
|
|
else if (mode == BUFFER_LOCK_SHARE)
|
|
{
|
|
unsigned i = 0;
|
|
|
|
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
|
|
while (buf->ri_lock || buf->w_lock)
|
|
{
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_UNLOCK(&(buf->cntx_lock));
|
|
s_lock_sleep(i++);
|
|
S_LOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
s_lock_sleep(i++);
|
|
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
}
|
|
(buf->r_locks)++;
|
|
*buflock |= BL_R_LOCK;
|
|
}
|
|
else if (mode == BUFFER_LOCK_EXCLUSIVE)
|
|
{
|
|
unsigned i = 0;
|
|
|
|
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
|
|
while (buf->r_locks > 0 || buf->w_lock)
|
|
{
|
|
if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK))
|
|
{
|
|
|
|
/*
|
|
* Our RI lock might be removed by concurrent W lock
|
|
* acquiring (see what we do with RI locks below when our
|
|
* own W acquiring succeeded) and so we set RI lock again
|
|
* if we already did this.
|
|
*/
|
|
*buflock |= BL_RI_LOCK;
|
|
buf->ri_lock = true;
|
|
}
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_UNLOCK(&(buf->cntx_lock));
|
|
s_lock_sleep(i++);
|
|
S_LOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
s_lock_sleep(i++);
|
|
IpcSemaphoreLock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
}
|
|
buf->w_lock = true;
|
|
*buflock |= BL_W_LOCK;
|
|
if (*buflock & BL_RI_LOCK)
|
|
{
|
|
|
|
/*
|
|
* It's possible to remove RI locks acquired by another W
|
|
* lockers here, but they'll take care about it.
|
|
*/
|
|
buf->ri_lock = false;
|
|
*buflock &= ~BL_RI_LOCK;
|
|
}
|
|
}
|
|
else
|
|
elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
|
|
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_UNLOCK(&(buf->cntx_lock));
|
|
#else
|
|
IpcSemaphoreUnlock(WaitCLSemId, 0, IpcExclusiveLock);
|
|
#endif
|
|
|
|
}
|
|
|
|
/*
|
|
* Functions for IO error handling
|
|
*
|
|
* Note : We assume that nested buffer IO never occur.
|
|
* i.e at most one io_in_progress spinlock is held
|
|
* per proc.
|
|
*/
|
|
static BufferDesc *InProgressBuf = (BufferDesc *) NULL;
|
|
static bool IsForInput;
|
|
|
|
/*
|
|
* Function:StartBufferIO
|
|
* (Assumptions)
|
|
* My process is executing no IO
|
|
* BufMgrLock is held
|
|
* BM_IO_IN_PROGRESS mask is not set for the buffer
|
|
* The buffer is Pinned
|
|
*
|
|
*/
|
|
static void
|
|
StartBufferIO(BufferDesc *buf, bool forInput)
|
|
{
|
|
Assert(!InProgressBuf);
|
|
Assert(!(buf->flags & BM_IO_IN_PROGRESS));
|
|
buf->flags |= BM_IO_IN_PROGRESS;
|
|
#ifdef HAS_TEST_AND_SET
|
|
|
|
/*
|
|
* There used to be
|
|
*
|
|
* Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
|
|
*
|
|
* here, but that's wrong because of the way WaitIO works: someone else
|
|
* waiting for the I/O to complete will succeed in grabbing the lock
|
|
* for a few instructions, and if we context-swap back to here the
|
|
* Assert could fail. Tiny window for failure, but I've seen it
|
|
* happen -- tgl
|
|
*/
|
|
S_LOCK(&(buf->io_in_progress_lock));
|
|
#endif /* HAS_TEST_AND_SET */
|
|
InProgressBuf = buf;
|
|
IsForInput = forInput;
|
|
}
|
|
|
|
/*
|
|
* Function:TerminateBufferIO
|
|
* (Assumptions)
|
|
* My process is executing IO for the buffer
|
|
* BufMgrLock is held
|
|
* The buffer is Pinned
|
|
*
|
|
*/
|
|
static void
|
|
TerminateBufferIO(BufferDesc *buf)
|
|
{
|
|
Assert(buf == InProgressBuf);
|
|
#ifdef HAS_TEST_AND_SET
|
|
S_UNLOCK(&(buf->io_in_progress_lock));
|
|
#else
|
|
if (buf->refcount > 1)
|
|
SignalIO(buf);
|
|
#endif /* HAS_TEST_AND_SET */
|
|
InProgressBuf = (BufferDesc *) 0;
|
|
}
|
|
|
|
/*
|
|
* Function:ContinueBufferIO
|
|
* (Assumptions)
|
|
* My process is executing IO for the buffer
|
|
* BufMgrLock is held
|
|
* The buffer is Pinned
|
|
*
|
|
*/
|
|
static void
|
|
ContinueBufferIO(BufferDesc *buf, bool forInput)
|
|
{
|
|
Assert(buf == InProgressBuf);
|
|
Assert(buf->flags & BM_IO_IN_PROGRESS);
|
|
IsForInput = forInput;
|
|
}
|
|
|
|
#ifdef NOT_USED
|
|
void
|
|
InitBufferIO(void)
|
|
{
|
|
InProgressBuf = (BufferDesc *) 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* This function is called from ProcReleaseSpins().
|
|
* BufMgrLock isn't held when this function is called.
|
|
* BM_IO_ERROR is always set. If BM_IO_ERROR was already
|
|
* set in case of output,this routine would kill all
|
|
* backends and reset postmaster.
|
|
*/
|
|
void
|
|
AbortBufferIO(void)
|
|
{
|
|
BufferDesc *buf = InProgressBuf;
|
|
|
|
if (buf)
|
|
{
|
|
Assert(buf->flags & BM_IO_IN_PROGRESS);
|
|
SpinAcquire(BufMgrLock);
|
|
if (IsForInput)
|
|
Assert(!(buf->flags & BM_DIRTY));
|
|
else
|
|
{
|
|
Assert((buf->flags & BM_DIRTY) != 0);
|
|
if ((buf->flags & BM_IO_ERROR) != 0)
|
|
{
|
|
elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s",
|
|
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
|
|
}
|
|
buf->flags |= BM_DIRTY;
|
|
}
|
|
buf->flags |= BM_IO_ERROR;
|
|
buf->flags &= ~BM_IO_IN_PROGRESS;
|
|
TerminateBufferIO(buf);
|
|
SpinRelease(BufMgrLock);
|
|
}
|
|
}
|