mirror of
https://github.com/postgres/postgres.git
synced 2025-11-16 15:02:33 +03:00
pgindent run for 8.3.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.226 2007/09/25 22:11:48 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.227 2007/11/15 21:14:37 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -77,8 +77,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
|
||||
|
||||
|
||||
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
|
||||
bool zeroPage,
|
||||
BufferAccessStrategy strategy);
|
||||
bool zeroPage,
|
||||
BufferAccessStrategy strategy);
|
||||
static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
|
||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
||||
static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
|
||||
@@ -90,8 +90,8 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
|
||||
int set_flag_bits);
|
||||
static void buffer_write_error_callback(void *arg);
|
||||
static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr);
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr);
|
||||
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
|
||||
static void AtProcExit_Buffers(int code, Datum arg);
|
||||
|
||||
@@ -215,10 +215,10 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage,
|
||||
* This can happen because mdread doesn't complain about reads beyond
|
||||
* EOF (when zero_damaged_pages is ON) and so a previous attempt to
|
||||
* read a block beyond EOF could have left a "valid" zero-filled
|
||||
* buffer. Unfortunately, we have also seen this case occurring
|
||||
* buffer. Unfortunately, we have also seen this case occurring
|
||||
* because of buggy Linux kernels that sometimes return an
|
||||
* lseek(SEEK_END) result that doesn't account for a recent write.
|
||||
* In that situation, the pre-existing buffer would contain valid data
|
||||
* lseek(SEEK_END) result that doesn't account for a recent write. In
|
||||
* that situation, the pre-existing buffer would contain valid data
|
||||
* that we don't want to overwrite. Since the legitimate case should
|
||||
* always have left a zero-filled buffer, complain if not PageIsNew.
|
||||
*/
|
||||
@@ -283,9 +283,9 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage,
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Read in the page, unless the caller intends to overwrite it
|
||||
* and just wants us to allocate a buffer.
|
||||
/*
|
||||
* Read in the page, unless the caller intends to overwrite it and
|
||||
* just wants us to allocate a buffer.
|
||||
*/
|
||||
if (zeroPage)
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
@@ -420,7 +420,7 @@ BufferAlloc(Relation reln,
|
||||
/* Loop here in case we have to try another victim buffer */
|
||||
for (;;)
|
||||
{
|
||||
bool lock_held;
|
||||
bool lock_held;
|
||||
|
||||
/*
|
||||
* Select a victim buffer. The buffer is returned with its header
|
||||
@@ -472,7 +472,7 @@ BufferAlloc(Relation reln,
|
||||
* If using a nondefault strategy, and writing the buffer
|
||||
* would require a WAL flush, let the strategy decide whether
|
||||
* to go ahead and write/reuse the buffer or to choose another
|
||||
* victim. We need lock to inspect the page LSN, so this
|
||||
* victim. We need lock to inspect the page LSN, so this
|
||||
* can't be done inside StrategyGetBuffer.
|
||||
*/
|
||||
if (strategy != NULL &&
|
||||
@@ -630,8 +630,8 @@ BufferAlloc(Relation reln,
|
||||
*
|
||||
* Clearing BM_VALID here is necessary, clearing the dirtybits is just
|
||||
* paranoia. We also reset the usage_count since any recency of use of
|
||||
* the old content is no longer relevant. (The usage_count starts out
|
||||
* at 1 so that the buffer can survive one clock-sweep pass.)
|
||||
* the old content is no longer relevant. (The usage_count starts out at
|
||||
* 1 so that the buffer can survive one clock-sweep pass.)
|
||||
*/
|
||||
buf->tag = newTag;
|
||||
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR);
|
||||
@@ -865,7 +865,7 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
* when we first pin it; for other strategies we just make sure the usage_count
|
||||
* isn't zero. (The idea of the latter is that we don't want synchronized
|
||||
* heap scans to inflate the count, but we need it to not be zero to discourage
|
||||
* other backends from stealing buffers from our ring. As long as we cycle
|
||||
* other backends from stealing buffers from our ring. As long as we cycle
|
||||
* through the ring faster than the global clock-sweep cycles, buffers in
|
||||
* our ring won't be chosen as victims for replacement by other backends.)
|
||||
*
|
||||
@@ -1016,9 +1016,8 @@ BufferSync(int flags)
|
||||
* have the flag set.
|
||||
*
|
||||
* Note that if we fail to write some buffer, we may leave buffers with
|
||||
* BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer
|
||||
* would certainly need to be written for the next checkpoint attempt,
|
||||
* too.
|
||||
* BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
|
||||
* certainly need to be written for the next checkpoint attempt, too.
|
||||
*/
|
||||
num_to_write = 0;
|
||||
for (buf_id = 0; buf_id < NBuffers; buf_id++)
|
||||
@@ -1045,11 +1044,11 @@ BufferSync(int flags)
|
||||
|
||||
/*
|
||||
* Loop over all buffers again, and write the ones (still) marked with
|
||||
* BM_CHECKPOINT_NEEDED. In this loop, we start at the clock sweep
|
||||
* point since we might as well dump soon-to-be-recycled buffers first.
|
||||
* BM_CHECKPOINT_NEEDED. In this loop, we start at the clock sweep point
|
||||
* since we might as well dump soon-to-be-recycled buffers first.
|
||||
*
|
||||
* Note that we don't read the buffer alloc count here --- that should
|
||||
* be left untouched till the next BgBufferSync() call.
|
||||
* Note that we don't read the buffer alloc count here --- that should be
|
||||
* left untouched till the next BgBufferSync() call.
|
||||
*/
|
||||
buf_id = StrategySyncStart(NULL, NULL);
|
||||
num_to_scan = NBuffers;
|
||||
@@ -1067,8 +1066,8 @@ BufferSync(int flags)
|
||||
* examine the bit here and the time SyncOneBuffer acquires lock,
|
||||
* someone else not only wrote the buffer but replaced it with another
|
||||
* page and dirtied it. In that improbable case, SyncOneBuffer will
|
||||
* write the buffer though we didn't need to. It doesn't seem
|
||||
* worth guarding against this, though.
|
||||
* write the buffer though we didn't need to. It doesn't seem worth
|
||||
* guarding against this, though.
|
||||
*/
|
||||
if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
|
||||
{
|
||||
@@ -1092,8 +1091,8 @@ BufferSync(int flags)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Perform normal bgwriter duties and sleep to throttle
|
||||
* our I/O rate.
|
||||
* Perform normal bgwriter duties and sleep to throttle our
|
||||
* I/O rate.
|
||||
*/
|
||||
CheckpointWriteDelay(flags,
|
||||
(double) num_written / num_to_write);
|
||||
@@ -1105,8 +1104,8 @@ BufferSync(int flags)
|
||||
}
|
||||
|
||||
/*
|
||||
* Update checkpoint statistics. As noted above, this doesn't
|
||||
* include buffers written by other backends or bgwriter scan.
|
||||
* Update checkpoint statistics. As noted above, this doesn't include
|
||||
* buffers written by other backends or bgwriter scan.
|
||||
*/
|
||||
CheckpointStats.ckpt_bufs_written += num_written;
|
||||
}
|
||||
@@ -1128,7 +1127,7 @@ BgBufferSync(void)
|
||||
* Information saved between calls so we can determine the strategy
|
||||
* point's advance rate and avoid scanning already-cleaned buffers.
|
||||
*/
|
||||
static bool saved_info_valid = false;
|
||||
static bool saved_info_valid = false;
|
||||
static int prev_strategy_buf_id;
|
||||
static uint32 prev_strategy_passes;
|
||||
static int next_to_clean;
|
||||
@@ -1157,8 +1156,8 @@ BgBufferSync(void)
|
||||
int reusable_buffers;
|
||||
|
||||
/*
|
||||
* Find out where the freelist clock sweep currently is, and how
|
||||
* many buffer allocations have happened since our last call.
|
||||
* Find out where the freelist clock sweep currently is, and how many
|
||||
* buffer allocations have happened since our last call.
|
||||
*/
|
||||
strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
|
||||
|
||||
@@ -1166,9 +1165,9 @@ BgBufferSync(void)
|
||||
BgWriterStats.m_buf_alloc += recent_alloc;
|
||||
|
||||
/*
|
||||
* If we're not running the LRU scan, just stop after doing the
|
||||
* stats stuff. We mark the saved state invalid so that we can recover
|
||||
* sanely if LRU scan is turned back on later.
|
||||
* If we're not running the LRU scan, just stop after doing the stats
|
||||
* stuff. We mark the saved state invalid so that we can recover sanely
|
||||
* if LRU scan is turned back on later.
|
||||
*/
|
||||
if (bgwriter_lru_maxpages <= 0)
|
||||
{
|
||||
@@ -1178,18 +1177,19 @@ BgBufferSync(void)
|
||||
|
||||
/*
|
||||
* Compute strategy_delta = how many buffers have been scanned by the
|
||||
* clock sweep since last time. If first time through, assume none.
|
||||
* Then see if we are still ahead of the clock sweep, and if so, how many
|
||||
* buffers we could scan before we'd catch up with it and "lap" it.
|
||||
* Note: weird-looking coding of xxx_passes comparisons are to avoid
|
||||
* bogus behavior when the passes counts wrap around.
|
||||
* clock sweep since last time. If first time through, assume none. Then
|
||||
* see if we are still ahead of the clock sweep, and if so, how many
|
||||
* buffers we could scan before we'd catch up with it and "lap" it. Note:
|
||||
* weird-looking coding of xxx_passes comparisons are to avoid bogus
|
||||
* behavior when the passes counts wrap around.
|
||||
*/
|
||||
if (saved_info_valid)
|
||||
{
|
||||
int32 passes_delta = strategy_passes - prev_strategy_passes;
|
||||
int32 passes_delta = strategy_passes - prev_strategy_passes;
|
||||
|
||||
strategy_delta = strategy_buf_id - prev_strategy_buf_id;
|
||||
strategy_delta += (long) passes_delta * NBuffers;
|
||||
strategy_delta += (long) passes_delta *NBuffers;
|
||||
|
||||
Assert(strategy_delta >= 0);
|
||||
|
||||
if ((int32) (next_passes - strategy_passes) > 0)
|
||||
@@ -1218,8 +1218,8 @@ BgBufferSync(void)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We're behind, so skip forward to the strategy point
|
||||
* and start cleaning from there.
|
||||
* We're behind, so skip forward to the strategy point and start
|
||||
* cleaning from there.
|
||||
*/
|
||||
#ifdef BGW_DEBUG
|
||||
elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
|
||||
@@ -1235,8 +1235,8 @@ BgBufferSync(void)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Initializing at startup or after LRU scanning had been off.
|
||||
* Always start at the strategy point.
|
||||
* Initializing at startup or after LRU scanning had been off. Always
|
||||
* start at the strategy point.
|
||||
*/
|
||||
#ifdef BGW_DEBUG
|
||||
elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
|
||||
@@ -1254,8 +1254,8 @@ BgBufferSync(void)
|
||||
saved_info_valid = true;
|
||||
|
||||
/*
|
||||
* Compute how many buffers had to be scanned for each new allocation,
|
||||
* ie, 1/density of reusable buffers, and track a moving average of that.
|
||||
* Compute how many buffers had to be scanned for each new allocation, ie,
|
||||
* 1/density of reusable buffers, and track a moving average of that.
|
||||
*
|
||||
* If the strategy point didn't move, we don't update the density estimate
|
||||
*/
|
||||
@@ -1268,16 +1268,16 @@ BgBufferSync(void)
|
||||
|
||||
/*
|
||||
* Estimate how many reusable buffers there are between the current
|
||||
* strategy point and where we've scanned ahead to, based on the
|
||||
* smoothed density estimate.
|
||||
* strategy point and where we've scanned ahead to, based on the smoothed
|
||||
* density estimate.
|
||||
*/
|
||||
bufs_ahead = NBuffers - bufs_to_lap;
|
||||
reusable_buffers_est = (float) bufs_ahead / smoothed_density;
|
||||
|
||||
/*
|
||||
* Track a moving average of recent buffer allocations. Here, rather
|
||||
* than a true average we want a fast-attack, slow-decline behavior:
|
||||
* we immediately follow any increase.
|
||||
* Track a moving average of recent buffer allocations. Here, rather than
|
||||
* a true average we want a fast-attack, slow-decline behavior: we
|
||||
* immediately follow any increase.
|
||||
*/
|
||||
if (smoothed_alloc <= (float) recent_alloc)
|
||||
smoothed_alloc = recent_alloc;
|
||||
@@ -1291,12 +1291,12 @@ BgBufferSync(void)
|
||||
/*
|
||||
* Even in cases where there's been little or no buffer allocation
|
||||
* activity, we want to make a small amount of progress through the buffer
|
||||
* cache so that as many reusable buffers as possible are clean
|
||||
* after an idle period.
|
||||
* cache so that as many reusable buffers as possible are clean after an
|
||||
* idle period.
|
||||
*
|
||||
* (scan_whole_pool_milliseconds / BgWriterDelay) computes how many
|
||||
* times the BGW will be called during the scan_whole_pool time;
|
||||
* slice the buffer pool into that many sections.
|
||||
* (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
|
||||
* the BGW will be called during the scan_whole_pool time; slice the
|
||||
* buffer pool into that many sections.
|
||||
*/
|
||||
min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
|
||||
|
||||
@@ -1311,9 +1311,9 @@ BgBufferSync(void)
|
||||
|
||||
/*
|
||||
* Now write out dirty reusable buffers, working forward from the
|
||||
* next_to_clean point, until we have lapped the strategy scan, or
|
||||
* cleaned enough buffers to match our estimate of the next cycle's
|
||||
* allocation requirements, or hit the bgwriter_lru_maxpages limit.
|
||||
* next_to_clean point, until we have lapped the strategy scan, or cleaned
|
||||
* enough buffers to match our estimate of the next cycle's allocation
|
||||
* requirements, or hit the bgwriter_lru_maxpages limit.
|
||||
*/
|
||||
|
||||
/* Make sure we can handle the pin inside SyncOneBuffer */
|
||||
@@ -1326,7 +1326,7 @@ BgBufferSync(void)
|
||||
/* Execute the LRU scan */
|
||||
while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
|
||||
{
|
||||
int buffer_state = SyncOneBuffer(next_to_clean, true);
|
||||
int buffer_state = SyncOneBuffer(next_to_clean, true);
|
||||
|
||||
if (++next_to_clean >= NBuffers)
|
||||
{
|
||||
@@ -1361,11 +1361,11 @@ BgBufferSync(void)
|
||||
|
||||
/*
|
||||
* Consider the above scan as being like a new allocation scan.
|
||||
* Characterize its density and update the smoothed one based on it.
|
||||
* This effectively halves the moving average period in cases where
|
||||
* both the strategy and the background writer are doing some useful
|
||||
* scanning, which is helpful because a long memory isn't as desirable
|
||||
* on the density estimates.
|
||||
* Characterize its density and update the smoothed one based on it. This
|
||||
* effectively halves the moving average period in cases where both the
|
||||
* strategy and the background writer are doing some useful scanning,
|
||||
* which is helpful because a long memory isn't as desirable on the
|
||||
* density estimates.
|
||||
*/
|
||||
strategy_delta = bufs_to_lap - num_to_scan;
|
||||
recent_alloc = reusable_buffers - reusable_buffers_est;
|
||||
@@ -1402,7 +1402,7 @@ static int
|
||||
SyncOneBuffer(int buf_id, bool skip_recently_used)
|
||||
{
|
||||
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
|
||||
int result = 0;
|
||||
int result = 0;
|
||||
|
||||
/*
|
||||
* Check whether buffer needs writing.
|
||||
@@ -2312,7 +2312,7 @@ LockBufferForCleanup(Buffer buffer)
|
||||
*
|
||||
* We won't loop, but just check once to see if the pin count is OK. If
|
||||
* not, return FALSE with no lock held.
|
||||
*/
|
||||
*/
|
||||
bool
|
||||
ConditionalLockBufferForCleanup(Buffer buffer)
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.61 2007/09/25 20:03:38 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.62 2007/11/15 21:14:37 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -36,10 +36,10 @@ typedef struct
|
||||
*/
|
||||
|
||||
/*
|
||||
* Statistics. These counters should be wide enough that they can't
|
||||
* Statistics. These counters should be wide enough that they can't
|
||||
* overflow during a single bgwriter cycle.
|
||||
*/
|
||||
uint32 completePasses; /* Complete cycles of the clock sweep */
|
||||
uint32 completePasses; /* Complete cycles of the clock sweep */
|
||||
uint32 numBufferAllocs; /* Buffers allocated since last reset */
|
||||
} BufferStrategyControl;
|
||||
|
||||
@@ -57,31 +57,33 @@ typedef struct BufferAccessStrategyData
|
||||
BufferAccessStrategyType btype;
|
||||
/* Number of elements in buffers[] array */
|
||||
int ring_size;
|
||||
|
||||
/*
|
||||
* Index of the "current" slot in the ring, ie, the one most recently
|
||||
* returned by GetBufferFromRing.
|
||||
*/
|
||||
int current;
|
||||
|
||||
/*
|
||||
* True if the buffer just returned by StrategyGetBuffer had been in
|
||||
* the ring already.
|
||||
* True if the buffer just returned by StrategyGetBuffer had been in the
|
||||
* ring already.
|
||||
*/
|
||||
bool current_was_in_ring;
|
||||
|
||||
/*
|
||||
* Array of buffer numbers. InvalidBuffer (that is, zero) indicates
|
||||
* we have not yet selected a buffer for this ring slot. For allocation
|
||||
* Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
|
||||
* have not yet selected a buffer for this ring slot. For allocation
|
||||
* simplicity this is palloc'd together with the fixed fields of the
|
||||
* struct.
|
||||
*/
|
||||
Buffer buffers[1]; /* VARIABLE SIZE ARRAY */
|
||||
} BufferAccessStrategyData;
|
||||
Buffer buffers[1]; /* VARIABLE SIZE ARRAY */
|
||||
} BufferAccessStrategyData;
|
||||
|
||||
|
||||
/* Prototypes for internal functions */
|
||||
static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
|
||||
static void AddBufferToRing(BufferAccessStrategy strategy,
|
||||
volatile BufferDesc *buf);
|
||||
volatile BufferDesc *buf);
|
||||
|
||||
|
||||
/*
|
||||
@@ -108,8 +110,8 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
|
||||
int trycounter;
|
||||
|
||||
/*
|
||||
* If given a strategy object, see whether it can select a buffer.
|
||||
* We assume strategy objects don't need the BufFreelistLock.
|
||||
* If given a strategy object, see whether it can select a buffer. We
|
||||
* assume strategy objects don't need the BufFreelistLock.
|
||||
*/
|
||||
if (strategy != NULL)
|
||||
{
|
||||
@@ -127,7 +129,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
|
||||
|
||||
/*
|
||||
* We count buffer allocation requests so that the bgwriter can estimate
|
||||
* the rate of buffer consumption. Note that buffers recycled by a
|
||||
* the rate of buffer consumption. Note that buffers recycled by a
|
||||
* strategy object are intentionally not counted here.
|
||||
*/
|
||||
StrategyControl->numBufferAllocs++;
|
||||
@@ -151,8 +153,8 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
|
||||
* If the buffer is pinned or has a nonzero usage_count, we cannot use
|
||||
* it; discard it and retry. (This can only happen if VACUUM put a
|
||||
* valid buffer in the freelist and then someone else used it before
|
||||
* we got to it. It's probably impossible altogether as of 8.3,
|
||||
* but we'd better check anyway.)
|
||||
* we got to it. It's probably impossible altogether as of 8.3, but
|
||||
* we'd better check anyway.)
|
||||
*/
|
||||
LockBufHdr(buf);
|
||||
if (buf->refcount == 0 && buf->usage_count == 0)
|
||||
@@ -246,7 +248,7 @@ StrategyFreeBuffer(volatile BufferDesc *buf)
|
||||
*
|
||||
* In addition, we return the completed-pass count (which is effectively
|
||||
* the higher-order bits of nextVictimBuffer) and the count of recent buffer
|
||||
* allocs if non-NULL pointers are passed. The alloc count is reset after
|
||||
* allocs if non-NULL pointers are passed. The alloc count is reset after
|
||||
* being read.
|
||||
*/
|
||||
int
|
||||
@@ -363,12 +365,12 @@ BufferAccessStrategy
|
||||
GetAccessStrategy(BufferAccessStrategyType btype)
|
||||
{
|
||||
BufferAccessStrategy strategy;
|
||||
int ring_size;
|
||||
int ring_size;
|
||||
|
||||
/*
|
||||
* Select ring size to use. See buffer/README for rationales.
|
||||
* (Currently all cases are the same size, but keep this code
|
||||
* structure for flexibility.)
|
||||
* Select ring size to use. See buffer/README for rationales. (Currently
|
||||
* all cases are the same size, but keep this code structure for
|
||||
* flexibility.)
|
||||
*
|
||||
* Note: if you change the ring size for BAS_BULKREAD, see also
|
||||
* SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
|
||||
@@ -438,9 +440,9 @@ GetBufferFromRing(BufferAccessStrategy strategy)
|
||||
strategy->current = 0;
|
||||
|
||||
/*
|
||||
* If the slot hasn't been filled yet, tell the caller to allocate
|
||||
* a new buffer with the normal allocation strategy. He will then
|
||||
* fill this slot by calling AddBufferToRing with the new buffer.
|
||||
* If the slot hasn't been filled yet, tell the caller to allocate a new
|
||||
* buffer with the normal allocation strategy. He will then fill this
|
||||
* slot by calling AddBufferToRing with the new buffer.
|
||||
*/
|
||||
bufnum = strategy->buffers[strategy->current];
|
||||
if (bufnum == InvalidBuffer)
|
||||
@@ -454,9 +456,9 @@ GetBufferFromRing(BufferAccessStrategy strategy)
|
||||
*
|
||||
* If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
|
||||
* since our own previous usage of the ring element would have left it
|
||||
* there, but it might've been decremented by clock sweep since then).
|
||||
* A higher usage_count indicates someone else has touched the buffer,
|
||||
* so we shouldn't re-use it.
|
||||
* there, but it might've been decremented by clock sweep since then). A
|
||||
* higher usage_count indicates someone else has touched the buffer, so we
|
||||
* shouldn't re-use it.
|
||||
*/
|
||||
buf = &BufferDescriptors[bufnum - 1];
|
||||
LockBufHdr(buf);
|
||||
@@ -492,7 +494,7 @@ AddBufferToRing(BufferAccessStrategy strategy, volatile BufferDesc *buf)
|
||||
*
|
||||
* When a nondefault strategy is used, the buffer manager calls this function
|
||||
* when it turns out that the buffer selected by StrategyGetBuffer needs to
|
||||
* be written out and doing so would require flushing WAL too. This gives us
|
||||
* be written out and doing so would require flushing WAL too. This gives us
|
||||
* a chance to choose a different victim.
|
||||
*
|
||||
* Returns true if buffer manager should ask for a new victim, and false
|
||||
@@ -507,7 +509,7 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
|
||||
|
||||
/* Don't muck with behavior of normal buffer-replacement strategy */
|
||||
if (!strategy->current_was_in_ring ||
|
||||
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
|
||||
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.77 2007/05/30 20:11:59 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.78 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -139,7 +139,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
/* Found a usable buffer */
|
||||
LocalRefCount[b]++;
|
||||
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
||||
BufferDescriptorGetBuffer(bufHdr));
|
||||
BufferDescriptorGetBuffer(bufHdr));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -364,7 +364,7 @@ GetLocalBufferStorage(void)
|
||||
if (next_buf_in_block >= num_bufs_in_block)
|
||||
{
|
||||
/* Need to make a new request to memmgr */
|
||||
int num_bufs;
|
||||
int num_bufs;
|
||||
|
||||
/* Start with a 16-buffer request; subsequent ones double each time */
|
||||
num_bufs = Max(num_bufs_in_block * 2, 16);
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.140 2007/07/26 15:15:18 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.141 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
@@ -855,16 +855,16 @@ OpenTemporaryFile(bool interXact)
|
||||
|
||||
/*
|
||||
* If some temp tablespace(s) have been given to us, try to use the next
|
||||
* one. If a given tablespace can't be found, we silently fall back
|
||||
* to the database's default tablespace.
|
||||
* one. If a given tablespace can't be found, we silently fall back to
|
||||
* the database's default tablespace.
|
||||
*
|
||||
* BUT: if the temp file is slated to outlive the current transaction,
|
||||
* force it into the database's default tablespace, so that it will
|
||||
* not pose a threat to possible tablespace drop attempts.
|
||||
* force it into the database's default tablespace, so that it will not
|
||||
* pose a threat to possible tablespace drop attempts.
|
||||
*/
|
||||
if (numTempTableSpaces > 0 && !interXact)
|
||||
{
|
||||
Oid tblspcOid = GetNextTempTableSpace();
|
||||
Oid tblspcOid = GetNextTempTableSpace();
|
||||
|
||||
if (OidIsValid(tblspcOid))
|
||||
file = OpenTemporaryFileInTablespace(tblspcOid, false);
|
||||
@@ -872,7 +872,7 @@ OpenTemporaryFile(bool interXact)
|
||||
|
||||
/*
|
||||
* If not, or if tablespace is bad, create in database's default
|
||||
* tablespace. MyDatabaseTableSpace should normally be set before we get
|
||||
* tablespace. MyDatabaseTableSpace should normally be set before we get
|
||||
* here, but just in case it isn't, fall back to pg_default tablespace.
|
||||
*/
|
||||
if (file <= 0)
|
||||
@@ -941,8 +941,8 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
|
||||
if (file <= 0)
|
||||
{
|
||||
/*
|
||||
* We might need to create the tablespace's tempfile directory,
|
||||
* if no one has yet done so.
|
||||
* We might need to create the tablespace's tempfile directory, if no
|
||||
* one has yet done so.
|
||||
*
|
||||
* Don't check for error from mkdir; it could fail if someone else
|
||||
* just did the same thing. If it doesn't work then we'll bomb out on
|
||||
@@ -967,8 +967,8 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
|
||||
void
|
||||
FileClose(File file)
|
||||
{
|
||||
Vfd *vfdP;
|
||||
struct stat filestats;
|
||||
Vfd *vfdP;
|
||||
struct stat filestats;
|
||||
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
@@ -1542,13 +1542,14 @@ SetTempTablespaces(Oid *tableSpaces, int numSpaces)
|
||||
Assert(numSpaces >= 0);
|
||||
tempTableSpaces = tableSpaces;
|
||||
numTempTableSpaces = numSpaces;
|
||||
|
||||
/*
|
||||
* Select a random starting point in the list. This is to minimize
|
||||
* conflicts between backends that are most likely sharing the same
|
||||
* list of temp tablespaces. Note that if we create multiple temp
|
||||
* files in the same transaction, we'll advance circularly through
|
||||
* the list --- this ensures that large temporary sort files are
|
||||
* nicely spread across all available tablespaces.
|
||||
* Select a random starting point in the list. This is to minimize
|
||||
* conflicts between backends that are most likely sharing the same list
|
||||
* of temp tablespaces. Note that if we create multiple temp files in the
|
||||
* same transaction, we'll advance circularly through the list --- this
|
||||
* ensures that large temporary sort files are nicely spread across all
|
||||
* available tablespaces.
|
||||
*/
|
||||
if (numSpaces > 1)
|
||||
nextTempTableSpace = random() % numSpaces;
|
||||
@@ -1572,7 +1573,7 @@ TempTablespacesAreSet(void)
|
||||
/*
|
||||
* GetNextTempTableSpace
|
||||
*
|
||||
* Select the next temp tablespace to use. A result of InvalidOid means
|
||||
* Select the next temp tablespace to use. A result of InvalidOid means
|
||||
* to use the current database's default tablespace.
|
||||
*/
|
||||
Oid
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipc.c,v 1.98 2007/11/04 17:55:15 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipc.c,v 1.99 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -118,34 +118,31 @@ proc_exit(int code)
|
||||
#ifdef PROFILE_PID_DIR
|
||||
{
|
||||
/*
|
||||
* If we are profiling ourself then gprof's mcleanup() is about
|
||||
* to write out a profile to ./gmon.out. Since mcleanup() always
|
||||
* uses a fixed file name, each backend will overwrite earlier
|
||||
* profiles. To fix that, we create a separate subdirectory for
|
||||
* each backend (./gprof/pid) and 'cd' to that subdirectory before
|
||||
* we exit() - that forces mcleanup() to write each profile into
|
||||
* its own directory. We end up with something like:
|
||||
* $PGDATA/gprof/8829/gmon.out
|
||||
* $PGDATA/gprof/8845/gmon.out
|
||||
* ...
|
||||
* If we are profiling ourself then gprof's mcleanup() is about to
|
||||
* write out a profile to ./gmon.out. Since mcleanup() always uses a
|
||||
* fixed file name, each backend will overwrite earlier profiles. To
|
||||
* fix that, we create a separate subdirectory for each backend
|
||||
* (./gprof/pid) and 'cd' to that subdirectory before we exit() - that
|
||||
* forces mcleanup() to write each profile into its own directory. We
|
||||
* end up with something like: $PGDATA/gprof/8829/gmon.out
|
||||
* $PGDATA/gprof/8845/gmon.out ...
|
||||
*
|
||||
* To avoid undesirable disk space bloat, autovacuum workers are
|
||||
* discriminated against: all their gmon.out files go into the same
|
||||
* subdirectory. Without this, an installation that is "just sitting
|
||||
* there" nonetheless eats megabytes of disk space every few seconds.
|
||||
*
|
||||
* Note that we do this here instead of in an on_proc_exit()
|
||||
* callback because we want to ensure that this code executes
|
||||
* last - we don't want to interfere with any other on_proc_exit()
|
||||
* callback.
|
||||
* Note that we do this here instead of in an on_proc_exit() callback
|
||||
* because we want to ensure that this code executes last - we don't
|
||||
* want to interfere with any other on_proc_exit() callback.
|
||||
*/
|
||||
char gprofDirName[32];
|
||||
char gprofDirName[32];
|
||||
|
||||
if (IsAutoVacuumWorkerProcess())
|
||||
snprintf(gprofDirName, 32, "gprof/avworker");
|
||||
else
|
||||
snprintf(gprofDirName, 32, "gprof/%d", (int) getpid());
|
||||
|
||||
|
||||
mkdir("gprof", 0777);
|
||||
mkdir(gprofDirName, 0777);
|
||||
chdir(gprofDirName);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.92 2007/06/08 18:23:52 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.93 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -44,7 +44,7 @@ static bool addin_request_allowed = true;
|
||||
* a loadable module.
|
||||
*
|
||||
* This is only useful if called from the _PG_init hook of a library that
|
||||
* is loaded into the postmaster via shared_preload_libraries. Once
|
||||
* is loaded into the postmaster via shared_preload_libraries. Once
|
||||
* shared memory has been allocated, calls will be ignored. (We could
|
||||
* raise an error, but it seems better to make it a no-op, so that
|
||||
* libraries containing such calls can be reloaded if needed.)
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.36 2007/10/24 20:55:36 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.37 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -230,9 +230,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
|
||||
if (TransactionIdIsValid(latestXid))
|
||||
{
|
||||
/*
|
||||
* We must lock ProcArrayLock while clearing proc->xid, so
|
||||
* that we do not exit the set of "running" transactions while
|
||||
* someone else is taking a snapshot. See discussion in
|
||||
* We must lock ProcArrayLock while clearing proc->xid, so that we do
|
||||
* not exit the set of "running" transactions while someone else is
|
||||
* taking a snapshot. See discussion in
|
||||
* src/backend/access/transam/README.
|
||||
*/
|
||||
Assert(TransactionIdIsValid(proc->xid));
|
||||
@@ -244,7 +244,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
|
||||
proc->xmin = InvalidTransactionId;
|
||||
/* must be cleared with xid/xmin: */
|
||||
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
|
||||
proc->inCommit = false; /* be sure this is cleared in abort */
|
||||
proc->inCommit = false; /* be sure this is cleared in abort */
|
||||
|
||||
/* Clear the subtransaction-XID cache too while holding the lock */
|
||||
proc->subxids.nxids = 0;
|
||||
@@ -260,9 +260,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we have no XID, we don't need to lock, since we won't
|
||||
* affect anyone else's calculation of a snapshot. We might
|
||||
* change their estimate of global xmin, but that's OK.
|
||||
* If we have no XID, we don't need to lock, since we won't affect
|
||||
* anyone else's calculation of a snapshot. We might change their
|
||||
* estimate of global xmin, but that's OK.
|
||||
*/
|
||||
Assert(!TransactionIdIsValid(proc->xid));
|
||||
|
||||
@@ -270,7 +270,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
|
||||
proc->xmin = InvalidTransactionId;
|
||||
/* must be cleared with xid/xmin: */
|
||||
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
|
||||
proc->inCommit = false; /* be sure this is cleared in abort */
|
||||
proc->inCommit = false; /* be sure this is cleared in abort */
|
||||
|
||||
Assert(proc->subxids.nxids == 0);
|
||||
Assert(proc->subxids.overflowed == false);
|
||||
@@ -291,8 +291,8 @@ ProcArrayClearTransaction(PGPROC *proc)
|
||||
{
|
||||
/*
|
||||
* We can skip locking ProcArrayLock here, because this action does not
|
||||
* actually change anyone's view of the set of running XIDs: our entry
|
||||
* is duplicate with the gxact that has already been inserted into the
|
||||
* actually change anyone's view of the set of running XIDs: our entry is
|
||||
* duplicate with the gxact that has already been inserted into the
|
||||
* ProcArray.
|
||||
*/
|
||||
proc->xid = InvalidTransactionId;
|
||||
@@ -343,9 +343,9 @@ TransactionIdIsInProgress(TransactionId xid)
|
||||
|
||||
/*
|
||||
* Don't bother checking a transaction older than RecentXmin; it could not
|
||||
* possibly still be running. (Note: in particular, this guarantees
|
||||
* that we reject InvalidTransactionId, FrozenTransactionId, etc as
|
||||
* not running.)
|
||||
* possibly still be running. (Note: in particular, this guarantees that
|
||||
* we reject InvalidTransactionId, FrozenTransactionId, etc as not
|
||||
* running.)
|
||||
*/
|
||||
if (TransactionIdPrecedes(xid, RecentXmin))
|
||||
{
|
||||
@@ -364,8 +364,8 @@ TransactionIdIsInProgress(TransactionId xid)
|
||||
}
|
||||
|
||||
/*
|
||||
* If not first time through, get workspace to remember main XIDs in.
|
||||
* We malloc it permanently to avoid repeated palloc/pfree overhead.
|
||||
* If not first time through, get workspace to remember main XIDs in. We
|
||||
* malloc it permanently to avoid repeated palloc/pfree overhead.
|
||||
*/
|
||||
if (xids == NULL)
|
||||
{
|
||||
@@ -393,7 +393,7 @@ TransactionIdIsInProgress(TransactionId xid)
|
||||
/* No shortcuts, gotta grovel through the array */
|
||||
for (i = 0; i < arrayP->numProcs; i++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[i];
|
||||
volatile PGPROC *proc = arrayP->procs[i];
|
||||
TransactionId pxid;
|
||||
|
||||
/* Ignore my own proc --- dealt with it above */
|
||||
@@ -477,8 +477,8 @@ TransactionIdIsInProgress(TransactionId xid)
|
||||
|
||||
/*
|
||||
* It isn't aborted, so check whether the transaction tree it belongs to
|
||||
* is still running (or, more precisely, whether it was running when
|
||||
* we held ProcArrayLock).
|
||||
* is still running (or, more precisely, whether it was running when we
|
||||
* held ProcArrayLock).
|
||||
*/
|
||||
topxid = SubTransGetTopmostTransaction(xid);
|
||||
Assert(TransactionIdIsValid(topxid));
|
||||
@@ -519,7 +519,7 @@ TransactionIdIsActive(TransactionId xid)
|
||||
|
||||
for (i = 0; i < arrayP->numProcs; i++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[i];
|
||||
volatile PGPROC *proc = arrayP->procs[i];
|
||||
|
||||
/* Fetch xid just once - see GetNewTransactionId */
|
||||
TransactionId pxid = proc->xid;
|
||||
@@ -578,10 +578,10 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
|
||||
LWLockAcquire(ProcArrayLock, LW_SHARED);
|
||||
|
||||
/*
|
||||
* We initialize the MIN() calculation with latestCompletedXid + 1.
|
||||
* This is a lower bound for the XIDs that might appear in the ProcArray
|
||||
* later, and so protects us against overestimating the result due to
|
||||
* future additions.
|
||||
* We initialize the MIN() calculation with latestCompletedXid + 1. This
|
||||
* is a lower bound for the XIDs that might appear in the ProcArray later,
|
||||
* and so protects us against overestimating the result due to future
|
||||
* additions.
|
||||
*/
|
||||
result = ShmemVariableCache->latestCompletedXid;
|
||||
Assert(TransactionIdIsNormal(result));
|
||||
@@ -589,7 +589,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (ignoreVacuum && (proc->vacuumFlags & PROC_IN_VACUUM))
|
||||
continue;
|
||||
@@ -608,8 +608,8 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
|
||||
* Also consider the transaction's Xmin, if set.
|
||||
*
|
||||
* We must check both Xid and Xmin because a transaction might
|
||||
* have an Xmin but not (yet) an Xid; conversely, if it has
|
||||
* an Xid, that could determine some not-yet-set Xmin.
|
||||
* have an Xmin but not (yet) an Xid; conversely, if it has an
|
||||
* Xid, that could determine some not-yet-set Xmin.
|
||||
*/
|
||||
xid = proc->xmin; /* Fetch just once */
|
||||
if (TransactionIdIsNormal(xid) &&
|
||||
@@ -718,13 +718,13 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
|
||||
globalxmin = xmin = xmax;
|
||||
|
||||
/*
|
||||
* Spin over procArray checking xid, xmin, and subxids. The goal is
|
||||
* to gather all active xids, find the lowest xmin, and try to record
|
||||
* Spin over procArray checking xid, xmin, and subxids. The goal is to
|
||||
* gather all active xids, find the lowest xmin, and try to record
|
||||
* subxids.
|
||||
*/
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
TransactionId xid;
|
||||
|
||||
/* Ignore procs running LAZY VACUUM */
|
||||
@@ -742,7 +742,7 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
|
||||
|
||||
/*
|
||||
* If the transaction has been assigned an xid < xmax we add it to the
|
||||
* snapshot, and update xmin if necessary. There's no need to store
|
||||
* snapshot, and update xmin if necessary. There's no need to store
|
||||
* XIDs >= xmax, since we'll treat them as running anyway. We don't
|
||||
* bother to examine their subxids either.
|
||||
*
|
||||
@@ -841,8 +841,8 @@ GetTransactionsInCommit(TransactionId **xids_p)
|
||||
{
|
||||
ProcArrayStruct *arrayP = procArray;
|
||||
TransactionId *xids;
|
||||
int nxids;
|
||||
int index;
|
||||
int nxids;
|
||||
int index;
|
||||
|
||||
xids = (TransactionId *) palloc(arrayP->maxProcs * sizeof(TransactionId));
|
||||
nxids = 0;
|
||||
@@ -851,7 +851,8 @@ GetTransactionsInCommit(TransactionId **xids_p)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
/* Fetch xid just once - see GetNewTransactionId */
|
||||
TransactionId pxid = proc->xid;
|
||||
|
||||
@@ -877,21 +878,22 @@ GetTransactionsInCommit(TransactionId **xids_p)
|
||||
bool
|
||||
HaveTransactionsInCommit(TransactionId *xids, int nxids)
|
||||
{
|
||||
bool result = false;
|
||||
bool result = false;
|
||||
ProcArrayStruct *arrayP = procArray;
|
||||
int index;
|
||||
int index;
|
||||
|
||||
LWLockAcquire(ProcArrayLock, LW_SHARED);
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
/* Fetch xid just once - see GetNewTransactionId */
|
||||
TransactionId pxid = proc->xid;
|
||||
|
||||
if (proc->inCommit && TransactionIdIsValid(pxid))
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nxids; i++)
|
||||
{
|
||||
@@ -956,7 +958,7 @@ BackendPidGetProc(int pid)
|
||||
* Only main transaction Ids are considered. This function is mainly
|
||||
* useful for determining what backend owns a lock.
|
||||
*
|
||||
* Beware that not every xact has an XID assigned. However, as long as you
|
||||
* Beware that not every xact has an XID assigned. However, as long as you
|
||||
* only call this using an XID found on disk, you're safe.
|
||||
*/
|
||||
int
|
||||
@@ -973,7 +975,7 @@ BackendXidGetPid(TransactionId xid)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc->xid == xid)
|
||||
{
|
||||
@@ -1003,8 +1005,8 @@ IsBackendPid(int pid)
|
||||
* The array is palloc'd and is terminated with an invalid VXID.
|
||||
*
|
||||
* If limitXmin is not InvalidTransactionId, we skip any backends
|
||||
* with xmin >= limitXmin. If allDbs is false, we skip backends attached
|
||||
* to other databases. Also, our own process is always skipped.
|
||||
* with xmin >= limitXmin. If allDbs is false, we skip backends attached
|
||||
* to other databases. Also, our own process is always skipped.
|
||||
*/
|
||||
VirtualTransactionId *
|
||||
GetCurrentVirtualXIDs(TransactionId limitXmin, bool allDbs)
|
||||
@@ -1022,7 +1024,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool allDbs)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc == MyProc)
|
||||
continue;
|
||||
@@ -1080,7 +1082,7 @@ CountActiveBackends(void)
|
||||
*/
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc == MyProc)
|
||||
continue; /* do not count myself */
|
||||
@@ -1110,7 +1112,7 @@ CountDBBackends(Oid databaseid)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc->pid == 0)
|
||||
continue; /* do not count prepared xacts */
|
||||
@@ -1137,7 +1139,7 @@ CountUserBackends(Oid roleid)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc->pid == 0)
|
||||
continue; /* do not count prepared xacts */
|
||||
@@ -1189,7 +1191,7 @@ CheckOtherDBBackends(Oid databaseId)
|
||||
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
volatile PGPROC *proc = arrayP->procs[index];
|
||||
|
||||
if (proc->databaseId != databaseId)
|
||||
continue;
|
||||
@@ -1201,16 +1203,17 @@ CheckOtherDBBackends(Oid databaseId)
|
||||
if (proc->vacuumFlags & PROC_IS_AUTOVACUUM)
|
||||
{
|
||||
/* an autovacuum --- send it SIGTERM before sleeping */
|
||||
int autopid = proc->pid;
|
||||
int autopid = proc->pid;
|
||||
|
||||
/*
|
||||
* It's a bit awkward to release ProcArrayLock within the loop,
|
||||
* but we'd probably better do so before issuing kill(). We
|
||||
* have no idea what might block kill() inside the kernel...
|
||||
* It's a bit awkward to release ProcArrayLock within the
|
||||
* loop, but we'd probably better do so before issuing kill().
|
||||
* We have no idea what might block kill() inside the
|
||||
* kernel...
|
||||
*/
|
||||
LWLockRelease(ProcArrayLock);
|
||||
|
||||
(void) kill(autopid, SIGTERM); /* ignore any error */
|
||||
(void) kill(autopid, SIGTERM); /* ignore any error */
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -1225,14 +1228,14 @@ CheckOtherDBBackends(Oid databaseId)
|
||||
if (!found)
|
||||
{
|
||||
LWLockRelease(ProcArrayLock);
|
||||
return false; /* no conflicting backends, so done */
|
||||
return false; /* no conflicting backends, so done */
|
||||
}
|
||||
|
||||
/* else sleep and try again */
|
||||
pg_usleep(100 * 1000L); /* 100ms */
|
||||
pg_usleep(100 * 1000L); /* 100ms */
|
||||
}
|
||||
|
||||
return true; /* timed out, still conflicts */
|
||||
return true; /* timed out, still conflicts */
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.64 2007/09/05 18:10:47 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.65 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -394,7 +394,8 @@ GetNextLocalTransactionId(void)
|
||||
LocalTransactionId result;
|
||||
|
||||
/* loop to avoid returning InvalidLocalTransactionId at wraparound */
|
||||
do {
|
||||
do
|
||||
{
|
||||
result = nextLocalTransactionId++;
|
||||
} while (!LocalTransactionIdIsValid(result));
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.125 2007/06/12 19:46:24 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.126 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -337,10 +337,10 @@ inv_getsize(LargeObjectDesc *obj_desc)
|
||||
bool pfreeit;
|
||||
|
||||
found = true;
|
||||
if (HeapTupleHasNulls(tuple)) /* paranoia */
|
||||
if (HeapTupleHasNulls(tuple)) /* paranoia */
|
||||
elog(ERROR, "null field found in pg_largeobject");
|
||||
data = (Form_pg_largeobject) GETSTRUCT(tuple);
|
||||
datafield = &(data->data); /* see note at top of file */
|
||||
datafield = &(data->data); /* see note at top of file */
|
||||
pfreeit = false;
|
||||
if (VARATT_IS_EXTENDED(datafield))
|
||||
{
|
||||
@@ -443,7 +443,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
|
||||
bytea *datafield;
|
||||
bool pfreeit;
|
||||
|
||||
if (HeapTupleHasNulls(tuple)) /* paranoia */
|
||||
if (HeapTupleHasNulls(tuple)) /* paranoia */
|
||||
elog(ERROR, "null field found in pg_largeobject");
|
||||
data = (Form_pg_largeobject) GETSTRUCT(tuple);
|
||||
|
||||
@@ -468,7 +468,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
|
||||
off = (int) (obj_desc->offset - pageoff);
|
||||
Assert(off >= 0 && off < LOBLKSIZE);
|
||||
|
||||
datafield = &(data->data); /* see note at top of file */
|
||||
datafield = &(data->data); /* see note at top of file */
|
||||
pfreeit = false;
|
||||
if (VARATT_IS_EXTENDED(datafield))
|
||||
{
|
||||
@@ -569,7 +569,7 @@ inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
|
||||
{
|
||||
if ((oldtuple = index_getnext(sd, ForwardScanDirection)) != NULL)
|
||||
{
|
||||
if (HeapTupleHasNulls(oldtuple)) /* paranoia */
|
||||
if (HeapTupleHasNulls(oldtuple)) /* paranoia */
|
||||
elog(ERROR, "null field found in pg_largeobject");
|
||||
olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
|
||||
Assert(olddata->pageno >= pageno);
|
||||
@@ -700,16 +700,16 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
{
|
||||
int32 pageno = (int32) (len / LOBLKSIZE);
|
||||
int off;
|
||||
ScanKeyData skey[2];
|
||||
ScanKeyData skey[2];
|
||||
IndexScanDesc sd;
|
||||
HeapTuple oldtuple;
|
||||
Form_pg_largeobject olddata;
|
||||
Form_pg_largeobject olddata;
|
||||
struct
|
||||
{
|
||||
bytea hdr;
|
||||
char data[LOBLKSIZE];
|
||||
} workbuf;
|
||||
char *workb = VARDATA(&workbuf.hdr);
|
||||
char *workb = VARDATA(&workbuf.hdr);
|
||||
HeapTuple newtup;
|
||||
Datum values[Natts_pg_largeobject];
|
||||
char nulls[Natts_pg_largeobject];
|
||||
@@ -743,30 +743,30 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
obj_desc->snapshot, 2, skey);
|
||||
|
||||
/*
|
||||
* If possible, get the page the truncation point is in.
|
||||
* The truncation point may be beyond the end of the LO or
|
||||
* in a hole.
|
||||
* If possible, get the page the truncation point is in. The truncation
|
||||
* point may be beyond the end of the LO or in a hole.
|
||||
*/
|
||||
olddata = NULL;
|
||||
if ((oldtuple = index_getnext(sd, ForwardScanDirection)) != NULL)
|
||||
{
|
||||
if (HeapTupleHasNulls(oldtuple)) /* paranoia */
|
||||
if (HeapTupleHasNulls(oldtuple)) /* paranoia */
|
||||
elog(ERROR, "null field found in pg_largeobject");
|
||||
olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
|
||||
Assert(olddata->pageno >= pageno);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found the page of the truncation point we need to
|
||||
* truncate the data in it. Otherwise if we're in a hole,
|
||||
* we need to create a page to mark the end of data.
|
||||
* If we found the page of the truncation point we need to truncate the
|
||||
* data in it. Otherwise if we're in a hole, we need to create a page to
|
||||
* mark the end of data.
|
||||
*/
|
||||
if (olddata != NULL && olddata->pageno == pageno)
|
||||
{
|
||||
/* First, load old data into workbuf */
|
||||
bytea *datafield = &(olddata->data); /* see note at top of file */
|
||||
bool pfreeit = false;
|
||||
int pagelen;
|
||||
bytea *datafield = &(olddata->data); /* see note at top of
|
||||
* file */
|
||||
bool pfreeit = false;
|
||||
int pagelen;
|
||||
|
||||
if (VARATT_IS_EXTENDED(datafield))
|
||||
{
|
||||
@@ -778,14 +778,14 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
Assert(pagelen <= LOBLKSIZE);
|
||||
memcpy(workb, VARDATA(datafield), pagelen);
|
||||
if (pfreeit)
|
||||
pfree(datafield);
|
||||
pfree(datafield);
|
||||
|
||||
/*
|
||||
* Fill any hole
|
||||
*/
|
||||
off = len % LOBLKSIZE;
|
||||
if (off > pagelen)
|
||||
MemSet(workb + pagelen, 0, off - pagelen);
|
||||
MemSet(workb + pagelen, 0, off - pagelen);
|
||||
|
||||
/* compute length of new page */
|
||||
SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
|
||||
@@ -807,16 +807,15 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If the first page we found was after the truncation
|
||||
* point, we're in a hole that we'll fill, but we need to
|
||||
* delete the later page.
|
||||
* If the first page we found was after the truncation point, we're in
|
||||
* a hole that we'll fill, but we need to delete the later page.
|
||||
*/
|
||||
if (olddata != NULL && olddata->pageno > pageno)
|
||||
simple_heap_delete(lo_heap_r, &oldtuple->t_self);
|
||||
|
||||
/*
|
||||
* Write a brand new page.
|
||||
*
|
||||
*
|
||||
* Fill the hole up to the truncation point
|
||||
*/
|
||||
off = len % LOBLKSIZE;
|
||||
@@ -826,7 +825,7 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
/* compute length of new page */
|
||||
SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
|
||||
|
||||
/*
|
||||
/*
|
||||
* Form and insert new tuple
|
||||
*/
|
||||
memset(values, 0, sizeof(values));
|
||||
@@ -851,11 +850,10 @@ inv_truncate(LargeObjectDesc *obj_desc, int len)
|
||||
index_endscan(sd);
|
||||
|
||||
CatalogCloseIndexes(indstate);
|
||||
|
||||
|
||||
/*
|
||||
* Advance command counter so that tuple updates will be seen by later
|
||||
* large-object operations in this transaction.
|
||||
*/
|
||||
CommandCounterIncrement();
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.49 2007/10/26 20:45:10 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.50 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
* Interface:
|
||||
*
|
||||
@@ -110,7 +110,7 @@ static DEADLOCK_INFO *deadlockDetails;
|
||||
static int nDeadlockDetails;
|
||||
|
||||
/* PGPROC pointer of any blocking autovacuum worker found */
|
||||
static PGPROC *blocking_autovacuum_proc = NULL;
|
||||
static PGPROC *blocking_autovacuum_proc = NULL;
|
||||
|
||||
|
||||
/*
|
||||
@@ -275,7 +275,7 @@ DeadLockCheck(PGPROC *proc)
|
||||
PGPROC *
|
||||
GetBlockingAutoVacuumPgproc(void)
|
||||
{
|
||||
PGPROC *ptr;
|
||||
PGPROC *ptr;
|
||||
|
||||
ptr = blocking_autovacuum_proc;
|
||||
blocking_autovacuum_proc = NULL;
|
||||
@@ -524,7 +524,7 @@ FindLockCycleRecurse(PGPROC *checkProc,
|
||||
/*
|
||||
* Look for a blocking autovacuum. There can be more than
|
||||
* one in the deadlock cycle, in which case we just pick a
|
||||
* random one. We stash the autovacuum worker's PGPROC so
|
||||
* random one. We stash the autovacuum worker's PGPROC so
|
||||
* that the caller can send a cancel signal to it, if
|
||||
* appropriate.
|
||||
*
|
||||
@@ -532,10 +532,10 @@ FindLockCycleRecurse(PGPROC *checkProc,
|
||||
* OK only for checking the PROC_IS_AUTOVACUUM flag,
|
||||
* because that flag is set at process start and never
|
||||
* reset; there is logic elsewhere to avoid cancelling an
|
||||
* autovacuum that is working for preventing Xid wraparound
|
||||
* problems (which needs to read a different vacuumFlag
|
||||
* bit), but we don't do that here to avoid grabbing
|
||||
* ProcArrayLock.
|
||||
* autovacuum that is working for preventing Xid
|
||||
* wraparound problems (which needs to read a different
|
||||
* vacuumFlag bit), but we don't do that here to avoid
|
||||
* grabbing ProcArrayLock.
|
||||
*/
|
||||
if (proc->vacuumFlags & PROC_IS_AUTOVACUUM)
|
||||
blocking_autovacuum_proc = proc;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.93 2007/09/05 18:10:47 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.94 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -518,7 +518,7 @@ ConditionalXactLockTableWait(TransactionId xid)
|
||||
|
||||
|
||||
/*
|
||||
* VirtualXactLockTableInsert
|
||||
* VirtualXactLockTableInsert
|
||||
*
|
||||
* Insert a lock showing that the given virtual transaction ID is running ---
|
||||
* this is done at main transaction start when its VXID is assigned.
|
||||
@@ -537,7 +537,7 @@ VirtualXactLockTableInsert(VirtualTransactionId vxid)
|
||||
}
|
||||
|
||||
/*
|
||||
* VirtualXactLockTableWait
|
||||
* VirtualXactLockTableWait
|
||||
*
|
||||
* Waits until the lock on the given VXID is released, which shows that
|
||||
* the top-level transaction owning the VXID has ended.
|
||||
@@ -557,7 +557,7 @@ VirtualXactLockTableWait(VirtualTransactionId vxid)
|
||||
}
|
||||
|
||||
/*
|
||||
* ConditionalVirtualXactLockTableWait
|
||||
* ConditionalVirtualXactLockTableWait
|
||||
*
|
||||
* As above, but only lock if we can get the lock without blocking.
|
||||
* Returns TRUE if the lock was acquired.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.178 2007/09/05 18:10:47 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.179 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* A lock table is a shared memory hash table. When
|
||||
@@ -581,7 +581,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
}
|
||||
locallock->lock = lock;
|
||||
|
||||
@@ -647,7 +647,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
}
|
||||
locallock->proclock = proclock;
|
||||
|
||||
@@ -1716,9 +1716,9 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
|
||||
elog(ERROR, "unrecognized lock mode: %d", lockmode);
|
||||
|
||||
/*
|
||||
* Allocate memory to store results, and fill with InvalidVXID. We
|
||||
* only need enough space for MaxBackends + a terminator, since
|
||||
* prepared xacts don't count.
|
||||
* Allocate memory to store results, and fill with InvalidVXID. We only
|
||||
* need enough space for MaxBackends + a terminator, since prepared xacts
|
||||
* don't count.
|
||||
*/
|
||||
vxids = (VirtualTransactionId *)
|
||||
palloc0(sizeof(VirtualTransactionId) * (MaxBackends + 1));
|
||||
@@ -1771,8 +1771,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
|
||||
|
||||
/*
|
||||
* If we see an invalid VXID, then either the xact has already
|
||||
* committed (or aborted), or it's a prepared xact. In
|
||||
* either case we may ignore it.
|
||||
* committed (or aborted), or it's a prepared xact. In either
|
||||
* case we may ignore it.
|
||||
*/
|
||||
if (VirtualTransactionIdIsValid(vxid))
|
||||
vxids[count++] = vxid;
|
||||
@@ -2150,11 +2150,11 @@ GetLockStatusData(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* And release locks. We do this in reverse order for two reasons:
|
||||
* (1) Anyone else who needs more than one of the locks will be trying
|
||||
* to lock them in increasing order; we don't want to release the other
|
||||
* process until it can get all the locks it needs.
|
||||
* (2) This avoids O(N^2) behavior inside LWLockRelease.
|
||||
* And release locks. We do this in reverse order for two reasons: (1)
|
||||
* Anyone else who needs more than one of the locks will be trying to lock
|
||||
* them in increasing order; we don't want to release the other process
|
||||
* until it can get all the locks it needs. (2) This avoids O(N^2)
|
||||
* behavior inside LWLockRelease.
|
||||
*/
|
||||
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
|
||||
LWLockRelease(FirstLockMgrLock + i);
|
||||
@@ -2308,7 +2308,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2373,7 +2373,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
errhint("You might need to increase max_locks_per_transaction.")));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.48 2007/01/05 22:19:38 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.49 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -175,8 +175,8 @@ NumLWLocks(void)
|
||||
|
||||
/*
|
||||
* Add any requested by loadable modules; for backwards-compatibility
|
||||
* reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even
|
||||
* if there are no explicit requests.
|
||||
* reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even if
|
||||
* there are no explicit requests.
|
||||
*/
|
||||
lock_addin_request_allowed = false;
|
||||
numLocks += Max(lock_addin_request, NUM_USER_DEFINED_LWLOCKS);
|
||||
@@ -191,7 +191,7 @@ NumLWLocks(void)
|
||||
* a loadable module.
|
||||
*
|
||||
* This is only useful if called from the _PG_init hook of a library that
|
||||
* is loaded into the postmaster via shared_preload_libraries. Once
|
||||
* is loaded into the postmaster via shared_preload_libraries. Once
|
||||
* shared memory has been allocated, calls will be ignored. (We could
|
||||
* raise an error, but it seems better to make it a no-op, so that
|
||||
* libraries containing such calls can be reloaded if needed.)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.196 2007/10/26 20:45:10 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.197 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -207,7 +207,7 @@ InitProcGlobal(void)
|
||||
MemSet(AuxiliaryProcs, 0, NUM_AUXILIARY_PROCS * sizeof(PGPROC));
|
||||
for (i = 0; i < NUM_AUXILIARY_PROCS; i++)
|
||||
{
|
||||
AuxiliaryProcs[i].pid = 0; /* marks auxiliary proc as not in use */
|
||||
AuxiliaryProcs[i].pid = 0; /* marks auxiliary proc as not in use */
|
||||
PGSemaphoreCreate(&(AuxiliaryProcs[i].sem));
|
||||
}
|
||||
|
||||
@@ -362,7 +362,7 @@ InitProcessPhase2(void)
|
||||
*
|
||||
* Auxiliary processes are presently not expected to wait for real (lockmgr)
|
||||
* locks, so we need not set up the deadlock checker. They are never added
|
||||
* to the ProcArray or the sinval messaging mechanism, either. They also
|
||||
* to the ProcArray or the sinval messaging mechanism, either. They also
|
||||
* don't get a VXID assigned, since this is only useful when we actually
|
||||
* hold lockmgr locks.
|
||||
*/
|
||||
@@ -734,7 +734,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
PROC_QUEUE *waitQueue = &(lock->waitProcs);
|
||||
LOCKMASK myHeldLocks = MyProc->heldLocks;
|
||||
bool early_deadlock = false;
|
||||
bool allow_autovacuum_cancel = true;
|
||||
bool allow_autovacuum_cancel = true;
|
||||
int myWaitStatus;
|
||||
PGPROC *proc;
|
||||
int i;
|
||||
@@ -889,18 +889,18 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
|
||||
/*
|
||||
* waitStatus could change from STATUS_WAITING to something else
|
||||
* asynchronously. Read it just once per loop to prevent surprising
|
||||
* asynchronously. Read it just once per loop to prevent surprising
|
||||
* behavior (such as missing log messages).
|
||||
*/
|
||||
myWaitStatus = MyProc->waitStatus;
|
||||
|
||||
/*
|
||||
* If we are not deadlocked, but are waiting on an autovacuum-induced
|
||||
* task, send a signal to interrupt it.
|
||||
* task, send a signal to interrupt it.
|
||||
*/
|
||||
if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
|
||||
{
|
||||
PGPROC *autovac = GetBlockingAutoVacuumPgproc();
|
||||
PGPROC *autovac = GetBlockingAutoVacuumPgproc();
|
||||
|
||||
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
|
||||
|
||||
@@ -912,7 +912,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
(autovac->vacuumFlags & PROC_IS_AUTOVACUUM) &&
|
||||
!(autovac->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
|
||||
{
|
||||
int pid = autovac->pid;
|
||||
int pid = autovac->pid;
|
||||
|
||||
elog(DEBUG2, "sending cancel to blocking autovacuum pid = %d",
|
||||
pid);
|
||||
@@ -960,49 +960,50 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
if (deadlock_state == DS_SOFT_DEADLOCK)
|
||||
ereport(LOG,
|
||||
(errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
else if (deadlock_state == DS_HARD_DEADLOCK)
|
||||
{
|
||||
/*
|
||||
* This message is a bit redundant with the error that will
|
||||
* be reported subsequently, but in some cases the error
|
||||
* report might not make it to the log (eg, if it's caught by
|
||||
* an exception handler), and we want to ensure all long-wait
|
||||
* This message is a bit redundant with the error that will be
|
||||
* reported subsequently, but in some cases the error report
|
||||
* might not make it to the log (eg, if it's caught by an
|
||||
* exception handler), and we want to ensure all long-wait
|
||||
* events get logged.
|
||||
*/
|
||||
ereport(LOG,
|
||||
(errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
}
|
||||
|
||||
if (myWaitStatus == STATUS_WAITING)
|
||||
ereport(LOG,
|
||||
(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
else if (myWaitStatus == STATUS_OK)
|
||||
ereport(LOG,
|
||||
(errmsg("process %d acquired %s on %s after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
(errmsg("process %d acquired %s on %s after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
else
|
||||
{
|
||||
Assert(myWaitStatus == STATUS_ERROR);
|
||||
|
||||
/*
|
||||
* Currently, the deadlock checker always kicks its own
|
||||
* process, which means that we'll only see STATUS_ERROR
|
||||
* when deadlock_state == DS_HARD_DEADLOCK, and there's no
|
||||
* need to print redundant messages. But for completeness
|
||||
* and future-proofing, print a message if it looks like
|
||||
* someone else kicked us off the lock.
|
||||
* process, which means that we'll only see STATUS_ERROR when
|
||||
* deadlock_state == DS_HARD_DEADLOCK, and there's no need to
|
||||
* print redundant messages. But for completeness and
|
||||
* future-proofing, print a message if it looks like someone
|
||||
* else kicked us off the lock.
|
||||
*/
|
||||
if (deadlock_state != DS_HARD_DEADLOCK)
|
||||
ereport(LOG,
|
||||
(errmsg("process %d failed to acquire %s on %s after %ld.%03d ms",
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
MyProcPid, modename, buf.data, msecs, usecs)));
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we might still need to wait for the lock.
|
||||
* Reset state so we don't print the above messages again.
|
||||
* At this point we might still need to wait for the lock. Reset
|
||||
* state so we don't print the above messages again.
|
||||
*/
|
||||
deadlock_state = DS_NO_DEADLOCK;
|
||||
|
||||
@@ -1237,8 +1238,8 @@ CheckDeadLock(void)
|
||||
/*
|
||||
* Unlock my semaphore so that the interrupted ProcSleep() call can
|
||||
* print the log message (we daren't do it here because we are inside
|
||||
* a signal handler). It will then sleep again until someone
|
||||
* releases the lock.
|
||||
* a signal handler). It will then sleep again until someone releases
|
||||
* the lock.
|
||||
*
|
||||
* If blocked by autovacuum, this wakeup will enable ProcSleep to send
|
||||
* the cancelling signal to the autovacuum worker.
|
||||
@@ -1247,11 +1248,11 @@ CheckDeadLock(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* And release locks. We do this in reverse order for two reasons:
|
||||
* (1) Anyone else who needs more than one of the locks will be trying
|
||||
* to lock them in increasing order; we don't want to release the other
|
||||
* process until it can get all the locks it needs.
|
||||
* (2) This avoids O(N^2) behavior inside LWLockRelease.
|
||||
* And release locks. We do this in reverse order for two reasons: (1)
|
||||
* Anyone else who needs more than one of the locks will be trying to lock
|
||||
* them in increasing order; we don't want to release the other process
|
||||
* until it can get all the locks it needs. (2) This avoids O(N^2)
|
||||
* behavior inside LWLockRelease.
|
||||
*/
|
||||
check_done:
|
||||
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.75 2007/09/21 21:25:42 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.76 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -177,9 +177,9 @@ PageAddItem(Page page,
|
||||
if (PageHasFreeLinePointers(phdr))
|
||||
{
|
||||
/*
|
||||
* Look for "recyclable" (unused) ItemId. We check for no
|
||||
* storage as well, just to be paranoid --- unused items
|
||||
* should never have storage.
|
||||
* Look for "recyclable" (unused) ItemId. We check for no storage
|
||||
* as well, just to be paranoid --- unused items should never have
|
||||
* storage.
|
||||
*/
|
||||
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
|
||||
{
|
||||
@@ -510,12 +510,13 @@ PageGetExactFreeSpace(Page page)
|
||||
Size
|
||||
PageGetHeapFreeSpace(Page page)
|
||||
{
|
||||
Size space;
|
||||
Size space;
|
||||
|
||||
space = PageGetFreeSpace(page);
|
||||
if (space > 0)
|
||||
{
|
||||
OffsetNumber offnum, nline;
|
||||
OffsetNumber offnum,
|
||||
nline;
|
||||
|
||||
/*
|
||||
* Are there already MaxHeapTuplesPerPage line pointers in the page?
|
||||
@@ -531,7 +532,7 @@ PageGetHeapFreeSpace(Page page)
|
||||
*/
|
||||
for (offnum = FirstOffsetNumber; offnum <= nline; offnum++)
|
||||
{
|
||||
ItemId lp = PageGetItemId(page, offnum);
|
||||
ItemId lp = PageGetItemId(page, offnum);
|
||||
|
||||
if (!ItemIdIsUsed(lp))
|
||||
break;
|
||||
@@ -540,8 +541,8 @@ PageGetHeapFreeSpace(Page page)
|
||||
if (offnum > nline)
|
||||
{
|
||||
/*
|
||||
* The hint is wrong, but we can't clear it here since
|
||||
* we don't have the ability to mark the page dirty.
|
||||
* The hint is wrong, but we can't clear it here since we
|
||||
* don't have the ability to mark the page dirty.
|
||||
*/
|
||||
space = 0;
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.130 2007/11/15 20:36:40 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.131 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -34,7 +34,7 @@
|
||||
/* special values for the segno arg to RememberFsyncRequest */
|
||||
#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
|
||||
#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
|
||||
#define UNLINK_RELATION_REQUEST (InvalidBlockNumber-2)
|
||||
#define UNLINK_RELATION_REQUEST (InvalidBlockNumber-2)
|
||||
|
||||
/*
|
||||
* On Windows, we have to interpret EACCES as possibly meaning the same as
|
||||
@@ -44,9 +44,9 @@
|
||||
* a pending fsync request getting revoked ... see mdsync).
|
||||
*/
|
||||
#ifndef WIN32
|
||||
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT)
|
||||
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT)
|
||||
#else
|
||||
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES)
|
||||
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES)
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -68,7 +68,7 @@
|
||||
* not needed because of an mdtruncate() operation. The reason for leaving
|
||||
* them present at size zero, rather than unlinking them, is that other
|
||||
* backends and/or the bgwriter might be holding open file references to
|
||||
* such segments. If the relation expands again after mdtruncate(), such
|
||||
* such segments. If the relation expands again after mdtruncate(), such
|
||||
* that a deactivated segment becomes active again, it is important that
|
||||
* such file references still be valid --- else data might get written
|
||||
* out to an unlinked old copy of a segment file that will eventually
|
||||
@@ -125,7 +125,7 @@ typedef struct
|
||||
{
|
||||
RelFileNode rnode; /* the targeted relation */
|
||||
BlockNumber segno; /* which segment */
|
||||
} PendingOperationTag;
|
||||
} PendingOperationTag;
|
||||
|
||||
typedef uint16 CycleCtr; /* can be any convenient integer size */
|
||||
|
||||
@@ -139,8 +139,8 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
RelFileNode rnode; /* the dead relation to delete */
|
||||
CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */
|
||||
} PendingUnlinkEntry;
|
||||
CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */
|
||||
} PendingUnlinkEntry;
|
||||
|
||||
static HTAB *pendingOpsTable = NULL;
|
||||
static List *pendingUnlinks = NIL;
|
||||
@@ -154,7 +154,7 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */
|
||||
EXTENSION_FAIL, /* ereport if segment not present */
|
||||
EXTENSION_RETURN_NULL, /* return NULL if not present */
|
||||
EXTENSION_CREATE /* create new segments as needed */
|
||||
} ExtensionBehavior;
|
||||
} ExtensionBehavior;
|
||||
|
||||
/* local routines */
|
||||
static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
|
||||
@@ -167,7 +167,7 @@ static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
|
||||
int oflags);
|
||||
#endif
|
||||
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
|
||||
bool isTemp, ExtensionBehavior behavior);
|
||||
bool isTemp, ExtensionBehavior behavior);
|
||||
static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
|
||||
|
||||
|
||||
@@ -276,13 +276,13 @@ mdcreate(SMgrRelation reln, bool isRedo)
|
||||
* number from being reused. The scenario this protects us from is:
|
||||
* 1. We delete a relation (and commit, and actually remove its file).
|
||||
* 2. We create a new relation, which by chance gets the same relfilenode as
|
||||
* the just-deleted one (OIDs must've wrapped around for that to happen).
|
||||
* the just-deleted one (OIDs must've wrapped around for that to happen).
|
||||
* 3. We crash before another checkpoint occurs.
|
||||
* During replay, we would delete the file and then recreate it, which is fine
|
||||
* if the contents of the file were repopulated by subsequent WAL entries.
|
||||
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
|
||||
* file after populating it (as for instance CLUSTER and CREATE INDEX do),
|
||||
* the contents of the file would be lost forever. By leaving the empty file
|
||||
* the contents of the file would be lost forever. By leaving the empty file
|
||||
* until after the next checkpoint, we prevent reassignment of the relfilenode
|
||||
* number until it's safe, because relfilenode assignment skips over any
|
||||
* existing file.
|
||||
@@ -299,11 +299,11 @@ void
|
||||
mdunlink(RelFileNode rnode, bool isRedo)
|
||||
{
|
||||
char *path;
|
||||
int ret;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We have to clean out any pending fsync requests for the doomed relation,
|
||||
* else the next mdsync() will fail.
|
||||
* We have to clean out any pending fsync requests for the doomed
|
||||
* relation, else the next mdsync() will fail.
|
||||
*/
|
||||
ForgetRelationFsyncRequests(rnode);
|
||||
|
||||
@@ -336,8 +336,8 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
BlockNumber segno;
|
||||
|
||||
/*
|
||||
* Note that because we loop until getting ENOENT, we will
|
||||
* correctly remove all inactive segments as well as active ones.
|
||||
* Note that because we loop until getting ENOENT, we will correctly
|
||||
* remove all inactive segments as well as active ones.
|
||||
*/
|
||||
for (segno = 1;; segno++)
|
||||
{
|
||||
@@ -389,9 +389,9 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If a relation manages to grow to 2^32-1 blocks, refuse to extend it
|
||||
* any more --- we mustn't create a block whose number
|
||||
* actually is InvalidBlockNumber.
|
||||
* If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
|
||||
* more --- we mustn't create a block whose number actually is
|
||||
* InvalidBlockNumber.
|
||||
*/
|
||||
if (blocknum == InvalidBlockNumber)
|
||||
ereport(ERROR,
|
||||
@@ -414,7 +414,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
/*
|
||||
* Note: because caller usually obtained blocknum by calling mdnblocks,
|
||||
* which did a seek(SEEK_END), this seek is often redundant and will be
|
||||
* optimized away by fd.c. It's not redundant, however, if there is a
|
||||
* optimized away by fd.c. It's not redundant, however, if there is a
|
||||
* partial page at the end of the file. In that case we want to try to
|
||||
* overwrite the partial page with a full page. It's also not redundant
|
||||
* if bufmgr.c had to dump another buffer of the same file to make room
|
||||
@@ -588,16 +588,17 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read block %u of relation %u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not read block %u of relation %u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
|
||||
/*
|
||||
* Short read: we are at or past EOF, or we read a partial block at
|
||||
* EOF. Normally this is an error; upper levels should never try to
|
||||
* read a nonexistent block. However, if zero_damaged_pages is ON
|
||||
* or we are InRecovery, we should instead return zeroes without
|
||||
* read a nonexistent block. However, if zero_damaged_pages is ON or
|
||||
* we are InRecovery, we should instead return zeroes without
|
||||
* complaining. This allows, for example, the case of trying to
|
||||
* update a block that was later truncated away.
|
||||
*/
|
||||
@@ -657,11 +658,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write block %u of relation %u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not write block %u of relation %u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
/* short write: complain appropriately */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DISK_FULL),
|
||||
@@ -703,7 +704,7 @@ mdnblocks(SMgrRelation reln)
|
||||
* NOTE: this assumption could only be wrong if another backend has
|
||||
* truncated the relation. We rely on higher code levels to handle that
|
||||
* scenario by closing and re-opening the md fd, which is handled via
|
||||
* relcache flush. (Since the bgwriter doesn't participate in relcache
|
||||
* relcache flush. (Since the bgwriter doesn't participate in relcache
|
||||
* flush, it could have segment chain entries for inactive segments;
|
||||
* that's OK because the bgwriter never needs to compute relation size.)
|
||||
*/
|
||||
@@ -738,11 +739,11 @@ mdnblocks(SMgrRelation reln)
|
||||
if (v->mdfd_chain == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open segment %u of relation %u/%u/%u: %m",
|
||||
segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not open segment %u of relation %u/%u/%u: %m",
|
||||
segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
}
|
||||
|
||||
v = v->mdfd_chain;
|
||||
@@ -766,8 +767,8 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so
|
||||
* that truncation loop will get them all!
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||
* truncation loop will get them all!
|
||||
*/
|
||||
curnblk = mdnblocks(reln);
|
||||
if (nblocks > curnblk)
|
||||
@@ -796,9 +797,9 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
if (priorblocks > nblocks)
|
||||
{
|
||||
/*
|
||||
* This segment is no longer active (and has already been
|
||||
* unlinked from the mdfd_chain). We truncate the file, but do
|
||||
* not delete it, for reasons explained in the header comments.
|
||||
* This segment is no longer active (and has already been unlinked
|
||||
* from the mdfd_chain). We truncate the file, but do not delete
|
||||
* it, for reasons explained in the header comments.
|
||||
*/
|
||||
if (FileTruncate(v->mdfd_vfd, 0) < 0)
|
||||
ereport(ERROR,
|
||||
@@ -876,8 +877,8 @@ mdimmedsync(SMgrRelation reln)
|
||||
BlockNumber curnblk;
|
||||
|
||||
/*
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so
|
||||
* that fsync loop will get them all!
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||
* fsync loop will get them all!
|
||||
*/
|
||||
curnblk = mdnblocks(reln);
|
||||
|
||||
@@ -889,11 +890,11 @@ mdimmedsync(SMgrRelation reln)
|
||||
if (FileSync(v->mdfd_vfd) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
v->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
v->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
v = v->mdfd_chain;
|
||||
}
|
||||
#else
|
||||
@@ -929,12 +930,12 @@ mdsync(void)
|
||||
|
||||
/*
|
||||
* If we are in the bgwriter, the sync had better include all fsync
|
||||
* requests that were queued by backends up to this point. The tightest
|
||||
* requests that were queued by backends up to this point. The tightest
|
||||
* race condition that could occur is that a buffer that must be written
|
||||
* and fsync'd for the checkpoint could have been dumped by a backend
|
||||
* just before it was visited by BufferSync(). We know the backend will
|
||||
* have queued an fsync request before clearing the buffer's dirtybit,
|
||||
* so we are safe as long as we do an Absorb after completing BufferSync().
|
||||
* and fsync'd for the checkpoint could have been dumped by a backend just
|
||||
* before it was visited by BufferSync(). We know the backend will have
|
||||
* queued an fsync request before clearing the buffer's dirtybit, so we
|
||||
* are safe as long as we do an Absorb after completing BufferSync().
|
||||
*/
|
||||
AbsorbFsyncRequests();
|
||||
|
||||
@@ -946,21 +947,21 @@ mdsync(void)
|
||||
* ones: new ones will have cycle_ctr equal to the incremented value of
|
||||
* mdsync_cycle_ctr.
|
||||
*
|
||||
* In normal circumstances, all entries present in the table at this
|
||||
* point will have cycle_ctr exactly equal to the current (about to be old)
|
||||
* In normal circumstances, all entries present in the table at this point
|
||||
* will have cycle_ctr exactly equal to the current (about to be old)
|
||||
* value of mdsync_cycle_ctr. However, if we fail partway through the
|
||||
* fsync'ing loop, then older values of cycle_ctr might remain when we
|
||||
* come back here to try again. Repeated checkpoint failures would
|
||||
* eventually wrap the counter around to the point where an old entry
|
||||
* might appear new, causing us to skip it, possibly allowing a checkpoint
|
||||
* to succeed that should not have. To forestall wraparound, any time
|
||||
* the previous mdsync() failed to complete, run through the table and
|
||||
* to succeed that should not have. To forestall wraparound, any time the
|
||||
* previous mdsync() failed to complete, run through the table and
|
||||
* forcibly set cycle_ctr = mdsync_cycle_ctr.
|
||||
*
|
||||
* Think not to merge this loop with the main loop, as the problem is
|
||||
* exactly that that loop may fail before having visited all the entries.
|
||||
* From a performance point of view it doesn't matter anyway, as this
|
||||
* path will never be taken in a system that's functioning normally.
|
||||
* From a performance point of view it doesn't matter anyway, as this path
|
||||
* will never be taken in a system that's functioning normally.
|
||||
*/
|
||||
if (mdsync_in_progress)
|
||||
{
|
||||
@@ -994,10 +995,10 @@ mdsync(void)
|
||||
Assert((CycleCtr) (entry->cycle_ctr + 1) == mdsync_cycle_ctr);
|
||||
|
||||
/*
|
||||
* If fsync is off then we don't have to bother opening the file
|
||||
* at all. (We delay checking until this point so that changing
|
||||
* fsync on the fly behaves sensibly.) Also, if the entry is
|
||||
* marked canceled, fall through to delete it.
|
||||
* If fsync is off then we don't have to bother opening the file at
|
||||
* all. (We delay checking until this point so that changing fsync on
|
||||
* the fly behaves sensibly.) Also, if the entry is marked canceled,
|
||||
* fall through to delete it.
|
||||
*/
|
||||
if (enableFsync && !entry->canceled)
|
||||
{
|
||||
@@ -1018,16 +1019,16 @@ mdsync(void)
|
||||
|
||||
/*
|
||||
* The fsync table could contain requests to fsync segments that
|
||||
* have been deleted (unlinked) by the time we get to them.
|
||||
* Rather than just hoping an ENOENT (or EACCES on Windows) error
|
||||
* can be ignored, what we do on error is absorb pending requests
|
||||
* and then retry. Since mdunlink() queues a "revoke" message
|
||||
* before actually unlinking, the fsync request is guaranteed to
|
||||
* be marked canceled after the absorb if it really was this case.
|
||||
* have been deleted (unlinked) by the time we get to them. Rather
|
||||
* than just hoping an ENOENT (or EACCES on Windows) error can be
|
||||
* ignored, what we do on error is absorb pending requests and
|
||||
* then retry. Since mdunlink() queues a "revoke" message before
|
||||
* actually unlinking, the fsync request is guaranteed to be
|
||||
* marked canceled after the absorb if it really was this case.
|
||||
* DROP DATABASE likewise has to tell us to forget fsync requests
|
||||
* before it starts deletions.
|
||||
*/
|
||||
for (failures = 0; ; failures++) /* loop exits at "break" */
|
||||
for (failures = 0;; failures++) /* loop exits at "break" */
|
||||
{
|
||||
SMgrRelation reln;
|
||||
MdfdVec *seg;
|
||||
@@ -1052,13 +1053,13 @@ mdsync(void)
|
||||
/*
|
||||
* It is possible that the relation has been dropped or
|
||||
* truncated since the fsync request was entered. Therefore,
|
||||
* allow ENOENT, but only if we didn't fail already on
|
||||
* this file. This applies both during _mdfd_getseg() and
|
||||
* during FileSync, since fd.c might have closed the file
|
||||
* behind our back.
|
||||
* allow ENOENT, but only if we didn't fail already on this
|
||||
* file. This applies both during _mdfd_getseg() and during
|
||||
* FileSync, since fd.c might have closed the file behind our
|
||||
* back.
|
||||
*/
|
||||
seg = _mdfd_getseg(reln,
|
||||
entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
|
||||
entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
|
||||
false, EXTENSION_RETURN_NULL);
|
||||
if (seg != NULL &&
|
||||
FileSync(seg->mdfd_vfd) >= 0)
|
||||
@@ -1066,8 +1067,8 @@ mdsync(void)
|
||||
|
||||
/*
|
||||
* XXX is there any point in allowing more than one retry?
|
||||
* Don't see one at the moment, but easy to change the
|
||||
* test here if so.
|
||||
* Don't see one at the moment, but easy to change the test
|
||||
* here if so.
|
||||
*/
|
||||
if (!FILE_POSSIBLY_DELETED(errno) ||
|
||||
failures > 0)
|
||||
@@ -1091,22 +1092,22 @@ mdsync(void)
|
||||
* Absorb incoming requests and check to see if canceled.
|
||||
*/
|
||||
AbsorbFsyncRequests();
|
||||
absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
|
||||
absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
|
||||
|
||||
if (entry->canceled)
|
||||
break;
|
||||
} /* end retry loop */
|
||||
} /* end retry loop */
|
||||
}
|
||||
|
||||
/*
|
||||
* If we get here, either we fsync'd successfully, or we don't have
|
||||
* to because enableFsync is off, or the entry is (now) marked
|
||||
* canceled. Okay to delete it.
|
||||
* If we get here, either we fsync'd successfully, or we don't have to
|
||||
* because enableFsync is off, or the entry is (now) marked canceled.
|
||||
* Okay to delete it.
|
||||
*/
|
||||
if (hash_search(pendingOpsTable, &entry->tag,
|
||||
HASH_REMOVE, NULL) == NULL)
|
||||
elog(ERROR, "pendingOpsTable corrupted");
|
||||
} /* end loop over hashtable entries */
|
||||
} /* end loop over hashtable entries */
|
||||
|
||||
/* Flag successful completion of mdsync */
|
||||
mdsync_in_progress = false;
|
||||
@@ -1129,13 +1130,13 @@ mdsync(void)
|
||||
void
|
||||
mdpreckpt(void)
|
||||
{
|
||||
ListCell *cell;
|
||||
ListCell *cell;
|
||||
|
||||
/*
|
||||
* In case the prior checkpoint wasn't completed, stamp all entries in
|
||||
* the list with the current cycle counter. Anything that's in the
|
||||
* list at the start of checkpoint can surely be deleted after the
|
||||
* checkpoint is finished, regardless of when the request was made.
|
||||
* In case the prior checkpoint wasn't completed, stamp all entries in the
|
||||
* list with the current cycle counter. Anything that's in the list at
|
||||
* the start of checkpoint can surely be deleted after the checkpoint is
|
||||
* finished, regardless of when the request was made.
|
||||
*/
|
||||
foreach(cell, pendingUnlinks)
|
||||
{
|
||||
@@ -1145,8 +1146,8 @@ mdpreckpt(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Any unlink requests arriving after this point will be assigned the
|
||||
* next cycle counter, and won't be unlinked until next checkpoint.
|
||||
* Any unlink requests arriving after this point will be assigned the next
|
||||
* cycle counter, and won't be unlinked until next checkpoint.
|
||||
*/
|
||||
mdckpt_cycle_ctr++;
|
||||
}
|
||||
@@ -1162,11 +1163,11 @@ mdpostckpt(void)
|
||||
while (pendingUnlinks != NIL)
|
||||
{
|
||||
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) linitial(pendingUnlinks);
|
||||
char *path;
|
||||
char *path;
|
||||
|
||||
/*
|
||||
* New entries are appended to the end, so if the entry is new
|
||||
* we've reached the end of old entries.
|
||||
* New entries are appended to the end, so if the entry is new we've
|
||||
* reached the end of old entries.
|
||||
*/
|
||||
if (entry->cycle_ctr == mdsync_cycle_ctr)
|
||||
break;
|
||||
@@ -1222,11 +1223,11 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
|
||||
if (FileSync(seg->mdfd_vfd) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1272,7 +1273,7 @@ register_unlink(RelFileNode rnode)
|
||||
* - FORGET_RELATION_FSYNC means to cancel pending fsyncs for a relation
|
||||
* - FORGET_DATABASE_FSYNC means to cancel pending fsyncs for a whole database
|
||||
* - UNLINK_RELATION_REQUEST is a request to delete the file after the next
|
||||
* checkpoint.
|
||||
* checkpoint.
|
||||
*
|
||||
* (Handling the FORGET_* requests is a tad slow because the hash table has
|
||||
* to be searched linearly, but it doesn't seem worth rethinking the table
|
||||
@@ -1351,9 +1352,10 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
entry->canceled = false;
|
||||
entry->cycle_ctr = mdsync_cycle_ctr;
|
||||
}
|
||||
|
||||
/*
|
||||
* NB: it's intentional that we don't change cycle_ctr if the entry
|
||||
* already exists. The fsync request must be treated as old, even
|
||||
* already exists. The fsync request must be treated as old, even
|
||||
* though the new request will be satisfied too by any subsequent
|
||||
* fsync.
|
||||
*
|
||||
@@ -1361,8 +1363,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
* act just as though it wasn't there. The only case where this could
|
||||
* happen would be if a file had been deleted, we received but did not
|
||||
* yet act on the cancel request, and the same relfilenode was then
|
||||
* assigned to a new file. We mustn't lose the new request, but
|
||||
* it should be considered new not old.
|
||||
* assigned to a new file. We mustn't lose the new request, but it
|
||||
* should be considered new not old.
|
||||
*/
|
||||
}
|
||||
}
|
||||
@@ -1385,16 +1387,17 @@ ForgetRelationFsyncRequests(RelFileNode rnode)
|
||||
* message, we have to sleep and try again ... ugly, but hopefully
|
||||
* won't happen often.
|
||||
*
|
||||
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
|
||||
* an error would leave the no-longer-used file still present on
|
||||
* disk, which would be bad, so I'm inclined to assume that the
|
||||
* bgwriter will always empty the queue soon.
|
||||
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with an
|
||||
* error would leave the no-longer-used file still present on disk,
|
||||
* which would be bad, so I'm inclined to assume that the bgwriter
|
||||
* will always empty the queue soon.
|
||||
*/
|
||||
while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
|
||||
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||
|
||||
/*
|
||||
* Note we don't wait for the bgwriter to actually absorb the
|
||||
* revoke message; see mdsync() for the implications.
|
||||
* Note we don't wait for the bgwriter to actually absorb the revoke
|
||||
* message; see mdsync() for the implications.
|
||||
*/
|
||||
}
|
||||
}
|
||||
@@ -1511,24 +1514,24 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
|
||||
if (v->mdfd_chain == NULL)
|
||||
{
|
||||
/*
|
||||
* Normally we will create new segments only if authorized by
|
||||
* the caller (i.e., we are doing mdextend()). But when doing
|
||||
* WAL recovery, create segments anyway; this allows cases such as
|
||||
* Normally we will create new segments only if authorized by the
|
||||
* caller (i.e., we are doing mdextend()). But when doing WAL
|
||||
* recovery, create segments anyway; this allows cases such as
|
||||
* replaying WAL data that has a write into a high-numbered
|
||||
* segment of a relation that was later deleted. We want to go
|
||||
* ahead and create the segments so we can finish out the replay.
|
||||
*
|
||||
* We have to maintain the invariant that segments before the
|
||||
* last active segment are of size RELSEG_SIZE; therefore, pad
|
||||
* them out with zeroes if needed. (This only matters if caller
|
||||
* is extending the relation discontiguously, but that can happen
|
||||
* in hash indexes.)
|
||||
* We have to maintain the invariant that segments before the last
|
||||
* active segment are of size RELSEG_SIZE; therefore, pad them out
|
||||
* with zeroes if needed. (This only matters if caller is
|
||||
* extending the relation discontiguously, but that can happen in
|
||||
* hash indexes.)
|
||||
*/
|
||||
if (behavior == EXTENSION_CREATE || InRecovery)
|
||||
{
|
||||
if (_mdnblocks(reln, v) < RELSEG_SIZE)
|
||||
{
|
||||
char *zerobuf = palloc0(BLCKSZ);
|
||||
char *zerobuf = palloc0(BLCKSZ);
|
||||
|
||||
mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||
zerobuf, isTemp);
|
||||
@@ -1575,11 +1578,11 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg)
|
||||
if (len < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
/* note that this calculation will ignore any partial block at EOF */
|
||||
return (BlockNumber) (len / BLCKSZ);
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.107 2007/11/15 20:36:40 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.108 2007/11/15 21:14:38 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -53,13 +53,13 @@ typedef struct f_smgr
|
||||
char *buffer, bool isTemp);
|
||||
BlockNumber (*smgr_nblocks) (SMgrRelation reln);
|
||||
void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
|
||||
bool isTemp);
|
||||
bool isTemp);
|
||||
void (*smgr_immedsync) (SMgrRelation reln);
|
||||
void (*smgr_commit) (void); /* may be NULL */
|
||||
void (*smgr_abort) (void); /* may be NULL */
|
||||
void (*smgr_pre_ckpt) (void); /* may be NULL */
|
||||
void (*smgr_sync) (void); /* may be NULL */
|
||||
void (*smgr_post_ckpt) (void); /* may be NULL */
|
||||
void (*smgr_commit) (void); /* may be NULL */
|
||||
void (*smgr_abort) (void); /* may be NULL */
|
||||
void (*smgr_pre_ckpt) (void); /* may be NULL */
|
||||
void (*smgr_sync) (void); /* may be NULL */
|
||||
void (*smgr_post_ckpt) (void); /* may be NULL */
|
||||
} f_smgr;
|
||||
|
||||
|
||||
@@ -848,8 +848,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
/*
|
||||
* Forcibly create relation if it doesn't exist (which suggests that
|
||||
* it was dropped somewhere later in the WAL sequence). As in
|
||||
* XLogOpenRelation, we prefer to recreate the rel and replay the
|
||||
* log as best we can until the drop is seen.
|
||||
* XLogOpenRelation, we prefer to recreate the rel and replay the log
|
||||
* as best we can until the drop is seen.
|
||||
*/
|
||||
smgrcreate(reln, false, true);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user