mirror of
https://github.com/postgres/postgres.git
synced 2025-07-17 06:41:09 +03:00
Replace the BufMgrLock with separate locks on the lookup hashtable and
the freelist, plus per-buffer spinlocks that protect access to individual shared buffer headers. This requires abandoning a global freelist (since the freelist is a global contention point), which shoots down ARC and 2Q as well as plain LRU management. Adopt a clock sweep algorithm instead. Preliminary results show substantial improvement in multi-backend situations.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.244 2005/01/10 20:02:19 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.245 2005/03/04 20:21:05 tgl Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@ -1060,7 +1060,6 @@ setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid)
|
||||
/* Send out shared cache inval if necessary */
|
||||
if (!IsBootstrapProcessingMode())
|
||||
CacheInvalidateHeapTuple(pg_class, tuple);
|
||||
BufferSync(-1, -1);
|
||||
}
|
||||
else if (dirty)
|
||||
{
|
||||
|
@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.151 2005/02/26 18:43:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.152 2005/03/04 20:21:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -339,7 +339,7 @@ createdb(const CreatedbStmt *stmt)
|
||||
* up-to-date for the copy. (We really only need to flush buffers for
|
||||
* the source database, but bufmgr.c provides no API for that.)
|
||||
*/
|
||||
BufferSync(-1, -1);
|
||||
BufferSync();
|
||||
|
||||
/*
|
||||
* Close virtual file descriptors so the kernel has more available for
|
||||
@ -1201,7 +1201,7 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
* up-to-date for the copy. (We really only need to flush buffers for
|
||||
* the source database, but bufmgr.c provides no API for that.)
|
||||
*/
|
||||
BufferSync(-1, -1);
|
||||
BufferSync();
|
||||
|
||||
#ifndef WIN32
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.302 2005/02/26 18:43:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.303 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -36,7 +36,6 @@
|
||||
#include "commands/vacuum.h"
|
||||
#include "executor/executor.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/freespace.h"
|
||||
#include "storage/sinval.h"
|
||||
#include "storage/smgr.h"
|
||||
|
@ -37,7 +37,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.14 2005/02/19 23:16:15 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.15 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -116,9 +116,6 @@ static BgWriterShmemStruct *BgWriterShmem;
|
||||
* GUC parameters
|
||||
*/
|
||||
int BgWriterDelay = 200;
|
||||
int BgWriterPercent = 1;
|
||||
int BgWriterMaxPages = 100;
|
||||
|
||||
int CheckPointTimeout = 300;
|
||||
int CheckPointWarning = 30;
|
||||
|
||||
@ -274,7 +271,6 @@ BackgroundWriterMain(void)
|
||||
bool force_checkpoint = false;
|
||||
time_t now;
|
||||
int elapsed_secs;
|
||||
int n;
|
||||
long udelay;
|
||||
|
||||
/*
|
||||
@ -365,16 +361,13 @@ BackgroundWriterMain(void)
|
||||
* checkpoints happen at a predictable spacing.
|
||||
*/
|
||||
last_checkpoint_time = now;
|
||||
|
||||
/* Nap for configured time before rechecking */
|
||||
n = 1;
|
||||
}
|
||||
else
|
||||
n = BufferSync(BgWriterPercent, BgWriterMaxPages);
|
||||
BgBufferSync();
|
||||
|
||||
/*
|
||||
* Nap for the configured time or sleep for 10 seconds if there
|
||||
* was nothing to do at all.
|
||||
* Nap for the configured time, or sleep for 10 seconds if there
|
||||
* is no bgwriter activity configured.
|
||||
*
|
||||
* On some platforms, signals won't interrupt the sleep. To ensure
|
||||
* we respond reasonably promptly when someone signals us, break
|
||||
@ -383,7 +376,11 @@ BackgroundWriterMain(void)
|
||||
*
|
||||
* We absorb pending requests after each short sleep.
|
||||
*/
|
||||
udelay = ((n > 0) ? BgWriterDelay : 10000) * 1000L;
|
||||
if ((bgwriter_all_percent > 0.0 && bgwriter_all_maxpages > 0) ||
|
||||
(bgwriter_lru_percent > 0.0 && bgwriter_lru_maxpages > 0))
|
||||
udelay = BgWriterDelay * 1000L;
|
||||
else
|
||||
udelay = 10000000L;
|
||||
while (udelay > 1000000L)
|
||||
{
|
||||
if (got_SIGHUP || checkpoint_requested || shutdown_requested)
|
||||
|
@ -1,12 +1,12 @@
|
||||
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.7 2004/04/19 23:27:17 tgl Exp $
|
||||
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.8 2005/03/04 20:21:06 tgl Exp $
|
||||
|
||||
Notes about shared buffer access rules
|
||||
--------------------------------------
|
||||
|
||||
There are two separate access control mechanisms for shared disk buffers:
|
||||
reference counts (a/k/a pin counts) and buffer locks. (Actually, there's
|
||||
a third level of access control: one must hold the appropriate kind of
|
||||
lock on a relation before one can legally access any page belonging to
|
||||
reference counts (a/k/a pin counts) and buffer content locks. (Actually,
|
||||
there's a third level of access control: one must hold the appropriate kind
|
||||
of lock on a relation before one can legally access any page belonging to
|
||||
the relation. Relation-level locks are not discussed here.)
|
||||
|
||||
Pins: one must "hold a pin on" a buffer (increment its reference count)
|
||||
@ -26,7 +26,7 @@ handled by waiting to obtain the relation-level lock, which is why you'd
|
||||
better hold one first.) Pins may not be held across transaction
|
||||
boundaries, however.
|
||||
|
||||
Buffer locks: there are two kinds of buffer locks, shared and exclusive,
|
||||
Buffer content locks: there are two kinds of buffer lock, shared and exclusive,
|
||||
which act just as you'd expect: multiple backends can hold shared locks on
|
||||
the same buffer, but an exclusive lock prevents anyone else from holding
|
||||
either shared or exclusive lock. (These can alternatively be called READ
|
||||
@ -38,12 +38,12 @@ the same buffer. One must pin a buffer before trying to lock it.
|
||||
Buffer access rules:
|
||||
|
||||
1. To scan a page for tuples, one must hold a pin and either shared or
|
||||
exclusive lock. To examine the commit status (XIDs and status bits) of
|
||||
a tuple in a shared buffer, one must likewise hold a pin and either shared
|
||||
exclusive content lock. To examine the commit status (XIDs and status bits)
|
||||
of a tuple in a shared buffer, one must likewise hold a pin and either shared
|
||||
or exclusive lock.
|
||||
|
||||
2. Once one has determined that a tuple is interesting (visible to the
|
||||
current transaction) one may drop the buffer lock, yet continue to access
|
||||
current transaction) one may drop the content lock, yet continue to access
|
||||
the tuple's data for as long as one holds the buffer pin. This is what is
|
||||
typically done by heap scans, since the tuple returned by heap_fetch
|
||||
contains a pointer to tuple data in the shared buffer. Therefore the
|
||||
@ -52,9 +52,9 @@ change, but that is assumed not to matter after the initial determination
|
||||
of visibility is made.
|
||||
|
||||
3. To add a tuple or change the xmin/xmax fields of an existing tuple,
|
||||
one must hold a pin and an exclusive lock on the containing buffer.
|
||||
one must hold a pin and an exclusive content lock on the containing buffer.
|
||||
This ensures that no one else might see a partially-updated state of the
|
||||
tuple.
|
||||
tuple while they are doing visibility checks.
|
||||
|
||||
4. It is considered OK to update tuple commit status bits (ie, OR the
|
||||
values HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAP_XMAX_COMMITTED, or
|
||||
@ -76,7 +76,7 @@ no other backend can be holding a reference to an existing tuple that it
|
||||
might expect to examine again. Note that another backend might pin the
|
||||
buffer (increment the refcount) while one is performing the cleanup, but
|
||||
it won't be able to actually examine the page until it acquires shared
|
||||
or exclusive lock.
|
||||
or exclusive content lock.
|
||||
|
||||
|
||||
VACUUM FULL ignores rule #5, because it instead acquires exclusive lock at
|
||||
@ -97,149 +97,142 @@ for VACUUM's use, since we don't allow multiple VACUUMs concurrently on a
|
||||
single relation anyway.
|
||||
|
||||
|
||||
Buffer replacement strategy interface
|
||||
-------------------------------------
|
||||
Buffer manager's internal locking
|
||||
---------------------------------
|
||||
|
||||
The file freelist.c contains the buffer cache replacement strategy.
|
||||
The interface to the strategy is:
|
||||
Before PostgreSQL 8.1, all operations of the shared buffer manager itself
|
||||
were protected by a single system-wide lock, the BufMgrLock, which
|
||||
unsurprisingly proved to be a source of contention. The new locking scheme
|
||||
avoids grabbing system-wide exclusive locks in common code paths. It works
|
||||
like this:
|
||||
|
||||
BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
|
||||
int *cdb_found_index)
|
||||
* There is a system-wide LWLock, the BufMappingLock, that notionally
|
||||
protects the mapping from buffer tags (page identifiers) to buffers.
|
||||
(Physically, it can be thought of as protecting the hash table maintained
|
||||
by buf_table.c.) To look up whether a buffer exists for a tag, it is
|
||||
sufficient to obtain share lock on the BufMappingLock. Note that one
|
||||
must pin the found buffer, if any, before releasing the BufMappingLock.
|
||||
To alter the page assignment of any buffer, one must hold exclusive lock
|
||||
on the BufMappingLock. This lock must be held across adjusting the buffer's
|
||||
header fields and changing the buf_table hash table. The only common
|
||||
operation that needs exclusive lock is reading in a page that was not
|
||||
in shared buffers already, which will require at least a kernel call
|
||||
and usually a wait for I/O, so it will be slow anyway.
|
||||
|
||||
This is always the first call made by the buffer manager to check if a disk
|
||||
page is in memory. If so, the function returns the buffer descriptor and no
|
||||
further action is required. If the page is not in memory,
|
||||
StrategyBufferLookup() returns NULL.
|
||||
* A separate system-wide LWLock, the BufFreelistLock, provides mutual
|
||||
exclusion for operations that access the buffer free list or select
|
||||
buffers for replacement. This is always taken in exclusive mode since
|
||||
there are no read-only operations on those data structures. The buffer
|
||||
management policy is designed so that BufFreelistLock need not be taken
|
||||
except in paths that will require I/O, and thus will be slow anyway.
|
||||
(Details appear below.) It is never necessary to hold the BufMappingLock
|
||||
and the BufFreelistLock at the same time.
|
||||
|
||||
The flag recheck tells the strategy that this is a second lookup after
|
||||
flushing a dirty block. If the buffer manager has to evict another buffer,
|
||||
it will release the bufmgr lock while doing the write IO. During this time,
|
||||
another backend could possibly fault in the same page this backend is after,
|
||||
so we have to check again after the IO is done if the page is in memory now.
|
||||
* Each buffer header contains a spinlock that must be taken when examining
|
||||
or changing fields of that buffer header. This allows operations such as
|
||||
ReleaseBuffer to make local state changes without taking any system-wide
|
||||
lock. We use a spinlock, not an LWLock, since there are no cases where
|
||||
the lock needs to be held for more than a few instructions.
|
||||
|
||||
*cdb_found_index is set to the index of the found CDB, or -1 if none.
|
||||
This is not intended to be used by the caller, except to pass to
|
||||
StrategyReplaceBuffer().
|
||||
Note that a buffer header's spinlock does not control access to the data
|
||||
held within the buffer. Each buffer header also contains an LWLock, the
|
||||
"buffer content lock", that *does* represent the right to access the data
|
||||
in the buffer. It is used per the rules above.
|
||||
|
||||
BufferDesc *StrategyGetBuffer(int *cdb_replace_index)
|
||||
|
||||
The buffer manager calls this function to get an unpinned cache buffer whose
|
||||
content can be evicted. The returned buffer might be empty, clean or dirty.
|
||||
|
||||
The returned buffer is only a candidate for replacement. It is possible that
|
||||
while the buffer is being written, another backend finds and modifies it, so
|
||||
that it is dirty again. The buffer manager will then have to call
|
||||
StrategyGetBuffer() again to ask for another candidate.
|
||||
|
||||
*cdb_replace_index is set to the index of the candidate CDB, or -1 if none
|
||||
(meaning we are using a previously free buffer). This is not intended to be
|
||||
used by the caller, except to pass to StrategyReplaceBuffer().
|
||||
|
||||
void StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
|
||||
int cdb_found_index, int cdb_replace_index)
|
||||
|
||||
Called by the buffer manager at the time it is about to change the association
|
||||
of a buffer with a disk page.
|
||||
|
||||
Before this call, StrategyBufferLookup() still has to find the buffer under
|
||||
its old tag, even if it was returned by StrategyGetBuffer() as a candidate
|
||||
for replacement.
|
||||
|
||||
After this call, this buffer must be returned for a lookup of the new page
|
||||
identified by *newTag.
|
||||
|
||||
cdb_found_index and cdb_replace_index must be the auxiliary values
|
||||
returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
|
||||
|
||||
void StrategyInvalidateBuffer(BufferDesc *buf)
|
||||
|
||||
Called by the buffer manager to inform the strategy that the content of this
|
||||
buffer is being thrown away. This happens for example in the case of dropping
|
||||
a relation. The buffer must be clean and unpinned on call.
|
||||
|
||||
If the buffer was associated with a disk page, StrategyBufferLookup()
|
||||
must not return it for this page after the call.
|
||||
|
||||
void StrategyHintVacuum(bool vacuum_active)
|
||||
|
||||
Because VACUUM reads all relations of the entire database through the buffer
|
||||
manager, it can greatly disturb the buffer replacement strategy. This function
|
||||
is used by VACUUM to inform the strategy that subsequent buffer lookups are
|
||||
(or are not) caused by VACUUM scanning relations.
|
||||
There is yet another set of per-buffer LWLocks, the io_in_progress locks,
|
||||
that are used to wait for I/O on a buffer to complete. The process doing
|
||||
a read or write takes exclusive lock for the duration, and processes that
|
||||
need to wait for completion try to take shared locks (which they release
|
||||
immediately upon obtaining). XXX on systems where an LWLock represents
|
||||
nontrivial resources, it's fairly annoying to need so many locks. Possibly
|
||||
we could use per-backend LWLocks instead (a buffer header would then contain
|
||||
a field to show which backend is doing its I/O).
|
||||
|
||||
|
||||
Buffer replacement strategy
|
||||
---------------------------
|
||||
|
||||
The buffer replacement strategy actually used in freelist.c is a version of
|
||||
the Adaptive Replacement Cache (ARC) specially tailored for PostgreSQL.
|
||||
There is a "free list" of buffers that are prime candidates for replacement.
|
||||
In particular, buffers that are completely free (contain no valid page) are
|
||||
always in this list. We may also throw buffers into this list if we
|
||||
consider their pages unlikely to be needed soon. The list is singly-linked
|
||||
using fields in the buffer headers; we maintain head and tail pointers in
|
||||
global variables. (Note: although the list links are in the buffer headers,
|
||||
they are considered to be protected by the BufFreelistLock, not the
|
||||
buffer-header spinlocks.) To choose a victim buffer to recycle when there
|
||||
are no free buffers available, we use a simple clock-sweep algorithm, which
|
||||
avoids the need to take system-wide locks during common operations. It
|
||||
works like this:
|
||||
|
||||
The algorithm works as follows:
|
||||
Each buffer header contains a usage counter, which is incremented (up to a
|
||||
small limit value) whenever the buffer is unpinned. (This requires only the
|
||||
buffer header spinlock, which would have to be taken anyway to decrement the
|
||||
buffer reference count, so it's nearly free.)
|
||||
|
||||
C is the size of the cache in number of pages (a/k/a shared_buffers or
|
||||
NBuffers). ARC uses 2*C Cache Directory Blocks (CDB). A cache directory block
|
||||
is always associated with one unique file page. It may point to one shared
|
||||
buffer, or may indicate that the file page is not in a buffer but has been
|
||||
accessed recently.
|
||||
The "clock hand" is a buffer index, NextVictimBuffer, that moves circularly
|
||||
through all the available buffers. NextVictimBuffer is protected by the
|
||||
BufFreelistLock.
|
||||
|
||||
All CDB entries are managed in 4 LRU lists named T1, T2, B1 and B2. The T1 and
|
||||
T2 lists are the "real" cache entries, linking a file page to a memory buffer
|
||||
where the page is currently cached. Consequently T1len+T2len <= C. B1 and B2
|
||||
are ghost cache directories that extend T1 and T2 so that the strategy
|
||||
remembers pages longer. The strategy tries to keep B1len+T1len and B2len+T2len
|
||||
both at C. T1len and T2len vary over the runtime depending on the lookup
|
||||
pattern and its resulting cache hits. The desired size of T1len is called
|
||||
T1target.
|
||||
The algorithm for a process that needs to obtain a victim buffer is:
|
||||
|
||||
Assuming we have a full cache, one of 5 cases happens on a lookup:
|
||||
1. Obtain BufFreelistLock.
|
||||
|
||||
MISS On a cache miss, depending on T1target and the actual T1len
|
||||
the LRU buffer of either T1 or T2 is evicted. Its CDB is removed
|
||||
from the T list and added as MRU of the corresponding B list.
|
||||
The now free buffer is replaced with the requested page
|
||||
and added as MRU of T1.
|
||||
2. If buffer free list is nonempty, remove its head buffer. If the buffer
|
||||
is pinned or has a nonzero usage count, it cannot be used; ignore it and
|
||||
return to the start of step 2. Otherwise, pin the buffer, release
|
||||
BufFreelistLock, and return the buffer.
|
||||
|
||||
T1 hit The T1 CDB is moved to the MRU position of the T2 list.
|
||||
3. Otherwise, select the buffer pointed to by NextVictimBuffer, and
|
||||
circularly advance NextVictimBuffer for next time.
|
||||
|
||||
T2 hit The T2 CDB is moved to the MRU position of the T2 list.
|
||||
4. If the selected buffer is pinned or has a nonzero usage count, it cannot
|
||||
be used. Decrement its usage count (if nonzero) and return to step 3 to
|
||||
examine the next buffer.
|
||||
|
||||
B1 hit This means that a buffer that was evicted from the T1
|
||||
list is now requested again, indicating that T1target is
|
||||
too small (otherwise it would still be in T1 and thus in
|
||||
memory). The strategy raises T1target, evicts a buffer
|
||||
depending on T1target and T1len and places the CDB at
|
||||
MRU of T2.
|
||||
5. Pin the selected buffer, release BufFreelistLock, and return the buffer.
|
||||
|
||||
B2 hit This means the opposite of B1, the T2 list is probably too
|
||||
small. So the strategy lowers T1target, evicts a buffer
|
||||
and places the CDB at MRU of T2.
|
||||
(Note that if the selected buffer is dirty, we will have to write it out
|
||||
before we can recycle it; if someone else pins the buffer meanwhile we will
|
||||
have to give up and try another buffer. This however is not a concern
|
||||
of the basic select-a-victim-buffer algorithm.)
|
||||
|
||||
Thus, every page that is found on lookup in any of the four lists
|
||||
ends up as the MRU of the T2 list. The T2 list therefore is the
|
||||
"frequency" cache, holding frequently requested pages.
|
||||
A special provision is that while running VACUUM, a backend does not
|
||||
increment the usage count on buffers it accesses. In fact, if ReleaseBuffer
|
||||
sees that it is dropping the pin count to zero and the usage count is zero,
|
||||
then it appends the buffer to the tail of the free list. (This implies that
|
||||
VACUUM, but only VACUUM, must take the BufFreelistLock during ReleaseBuffer;
|
||||
this shouldn't create much of a contention problem.) This provision
|
||||
encourages VACUUM to work in a relatively small number of buffers rather
|
||||
than blowing out the entire buffer cache. It is reasonable since a page
|
||||
that has been touched only by VACUUM is unlikely to be needed again soon.
|
||||
|
||||
Every page that is seen for the first time ends up as the MRU of the T1
|
||||
list. The T1 list is the "recency" cache, holding recent newcomers.
|
||||
|
||||
The tailoring done for PostgreSQL has to do with the way the query executor
|
||||
works. A typical UPDATE or DELETE first scans the relation, searching for the
|
||||
tuples and then calls heap_update() or heap_delete(). This causes at least 2
|
||||
lookups for the block in the same statement. In the case of multiple matches
|
||||
in one block even more often. As a result, every block touched in an UPDATE or
|
||||
DELETE would directly jump into the T2 cache, which is wrong. To prevent this
|
||||
the strategy remembers which transaction added a buffer to the T1 list and
|
||||
will not promote it from there into the T2 cache during the same transaction.
|
||||
|
||||
Another specialty is the change of the strategy during VACUUM. Lookups during
|
||||
VACUUM do not represent application needs, and do not suggest that the page
|
||||
will be hit again soon, so it would be wrong to change the cache balance
|
||||
T1target due to that or to cause massive cache evictions. Therefore, a page
|
||||
read in to satisfy vacuum is placed at the LRU position of the T1 list, for
|
||||
immediate reuse. Also, if we happen to get a hit on a CDB entry during
|
||||
VACUUM, we do not promote the page above its current position in the list.
|
||||
Since VACUUM usually requests many pages very fast, the effect of this is that
|
||||
it will get back the very buffers it filled and possibly modified on the next
|
||||
call and will therefore do its work in a few shared memory buffers, while
|
||||
being able to use whatever it finds in the cache already. This also implies
|
||||
that most of the write traffic caused by a VACUUM will be done by the VACUUM
|
||||
itself and not pushed off onto other processes.
|
||||
|
||||
|
||||
Background writer's processing
|
||||
------------------------------
|
||||
|
||||
The background writer is designed to write out pages that are likely to be
|
||||
recycled soon, thereby offloading the writing work from active backends.
|
||||
To do this, it scans forward circularly from the current position of
|
||||
NextVictimBuffer (which it does not change!), looking for buffers that are
|
||||
dirty and not pinned nor marked with a positive usage count. It pins,
|
||||
writes, and releases any such buffer.
|
||||
|
||||
If we can assume that reading NextVictimBuffer is an atomic action, then
|
||||
the writer doesn't even need to take the BufFreelistLock in order to look
|
||||
for buffers to write; it needs only to spinlock each buffer header for long
|
||||
enough to check the dirtybit. Even without that assumption, the writer
|
||||
only needs to take the lock long enough to read the variable value, not
|
||||
while scanning the buffers. (This is a very substantial improvement in
|
||||
the contention cost of the writer compared to PG 8.0.)
|
||||
|
||||
During a checkpoint, the writer's strategy must be to write every dirty
|
||||
buffer (pinned or not!). We may as well make it start this scan from
|
||||
NextVictimBuffer, however, so that the first-to-be-written pages are the
|
||||
ones that backends might otherwise have to write for themselves soon.
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.71 2005/02/03 23:29:11 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.72 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -22,6 +22,8 @@ BufferDesc *BufferDescriptors;
|
||||
Block *BufferBlockPointers;
|
||||
int32 *PrivateRefCount;
|
||||
|
||||
static char *BufferBlocks;
|
||||
|
||||
/* statistics counters */
|
||||
long int ReadBufferCount;
|
||||
long int ReadLocalBufferCount;
|
||||
@ -50,16 +52,11 @@ long int LocalBufferFlushCount;
|
||||
*
|
||||
* Synchronization/Locking:
|
||||
*
|
||||
* BufMgrLock lock -- must be acquired before manipulating the
|
||||
* buffer search datastructures (lookup/freelist, as well as the
|
||||
* flag bits of any buffer). Must be released
|
||||
* before exit and before doing any IO.
|
||||
*
|
||||
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
|
||||
* It must be set when an IO is initiated and cleared at
|
||||
* the end of the IO. It is there to make sure that one
|
||||
* process doesn't start to use a buffer while another is
|
||||
* faulting it in. see IOWait/IOSignal.
|
||||
* faulting it in. see WaitIO and related routines.
|
||||
*
|
||||
* refcount -- Counts the number of processes holding pins on a buffer.
|
||||
* A buffer is pinned during IO and immediately after a BufferAlloc().
|
||||
@ -85,10 +82,8 @@ long int LocalBufferFlushCount;
|
||||
void
|
||||
InitBufferPool(void)
|
||||
{
|
||||
char *BufferBlocks;
|
||||
bool foundBufs,
|
||||
foundDescs;
|
||||
int i;
|
||||
|
||||
BufferDescriptors = (BufferDesc *)
|
||||
ShmemInitStruct("Buffer Descriptors",
|
||||
@ -102,52 +97,42 @@ InitBufferPool(void)
|
||||
{
|
||||
/* both should be present or neither */
|
||||
Assert(foundDescs && foundBufs);
|
||||
/* note: this path is only taken in EXEC_BACKEND case */
|
||||
}
|
||||
else
|
||||
{
|
||||
BufferDesc *buf;
|
||||
char *block;
|
||||
|
||||
/*
|
||||
* It's probably not really necessary to grab the lock --- if
|
||||
* there's anyone else attached to the shmem at this point, we've
|
||||
* got problems.
|
||||
*/
|
||||
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
|
||||
int i;
|
||||
|
||||
buf = BufferDescriptors;
|
||||
block = BufferBlocks;
|
||||
|
||||
/*
|
||||
* Initialize all the buffer headers.
|
||||
*/
|
||||
for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
|
||||
for (i = 0; i < NBuffers; buf++, i++)
|
||||
{
|
||||
Assert(ShmemIsValid((unsigned long) block));
|
||||
|
||||
/*
|
||||
* The bufNext fields link together all totally-unused buffers.
|
||||
* Subsequent management of this list is done by
|
||||
* StrategyGetBuffer().
|
||||
*/
|
||||
buf->bufNext = i + 1;
|
||||
|
||||
CLEAR_BUFFERTAG(buf->tag);
|
||||
buf->flags = 0;
|
||||
buf->usage_count = 0;
|
||||
buf->refcount = 0;
|
||||
buf->wait_backend_id = 0;
|
||||
|
||||
SpinLockInit(&buf->buf_hdr_lock);
|
||||
|
||||
buf->buf_id = i;
|
||||
|
||||
buf->data = MAKE_OFFSET(block);
|
||||
buf->flags = 0;
|
||||
buf->refcount = 0;
|
||||
/*
|
||||
* Initially link all the buffers together as unused.
|
||||
* Subsequent management of this list is done by freelist.c.
|
||||
*/
|
||||
buf->freeNext = i + 1;
|
||||
|
||||
buf->io_in_progress_lock = LWLockAssign();
|
||||
buf->cntx_lock = LWLockAssign();
|
||||
buf->cntxDirty = false;
|
||||
buf->wait_backend_id = 0;
|
||||
buf->content_lock = LWLockAssign();
|
||||
}
|
||||
|
||||
/* Correct last entry of linked list */
|
||||
BufferDescriptors[NBuffers - 1].bufNext = -1;
|
||||
|
||||
LWLockRelease(BufMgrLock);
|
||||
BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
|
||||
}
|
||||
|
||||
/* Init other shared buffer-management stuff */
|
||||
@ -162,12 +147,13 @@ InitBufferPool(void)
|
||||
* buffer pool.
|
||||
*
|
||||
* NB: this is called before InitProcess(), so we do not have a PGPROC and
|
||||
* cannot do LWLockAcquire; hence we can't actually access the bufmgr's
|
||||
* cannot do LWLockAcquire; hence we can't actually access stuff in
|
||||
* shared memory yet. We are only initializing local data here.
|
||||
*/
|
||||
void
|
||||
InitBufferPoolAccess(void)
|
||||
{
|
||||
char *block;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@ -179,12 +165,18 @@ InitBufferPoolAccess(void)
|
||||
sizeof(*PrivateRefCount));
|
||||
|
||||
/*
|
||||
* Convert shmem offsets into addresses as seen by this process. This
|
||||
* is just to speed up the BufferGetBlock() macro. It is OK to do this
|
||||
* without any lock since the data pointers never change.
|
||||
* Construct addresses for the individual buffer data blocks. We do
|
||||
* this just to speed up the BufferGetBlock() macro. (Since the
|
||||
* addresses should be the same in every backend, we could inherit
|
||||
* this data from the postmaster --- but in the EXEC_BACKEND case
|
||||
* that doesn't work.)
|
||||
*/
|
||||
block = BufferBlocks;
|
||||
for (i = 0; i < NBuffers; i++)
|
||||
BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
|
||||
{
|
||||
BufferBlockPointers[i] = (Block) block;
|
||||
block += BLCKSZ;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3,12 +3,9 @@
|
||||
* buf_table.c
|
||||
* routines for mapping BufferTags to buffer indexes.
|
||||
*
|
||||
* NOTE: this module is called only by freelist.c, and the "buffer IDs"
|
||||
* it deals with are whatever freelist.c needs them to be; they may not be
|
||||
* directly equivalent to Buffer numbers.
|
||||
*
|
||||
* Note: all routines in this file assume that the BufMgrLock is held
|
||||
* by the caller, so no synchronization is needed.
|
||||
* Note: the routines in this file do no locking of their own. The caller
|
||||
* must hold a suitable lock on the BufMappingLock, as specified in the
|
||||
* comments.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
@ -16,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.39 2005/02/03 23:29:11 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.40 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -74,17 +71,17 @@ InitBufTable(int size)
|
||||
/*
|
||||
* BufTableLookup
|
||||
* Lookup the given BufferTag; return buffer ID, or -1 if not found
|
||||
*
|
||||
* Caller must hold at least share lock on BufMappingLock
|
||||
*/
|
||||
int
|
||||
BufTableLookup(BufferTag *tagPtr)
|
||||
{
|
||||
BufferLookupEnt *result;
|
||||
|
||||
if (tagPtr->blockNum == P_NEW)
|
||||
return -1;
|
||||
|
||||
result = (BufferLookupEnt *)
|
||||
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
|
||||
|
||||
if (!result)
|
||||
return -1;
|
||||
|
||||
@ -93,14 +90,23 @@ BufTableLookup(BufferTag *tagPtr)
|
||||
|
||||
/*
|
||||
* BufTableInsert
|
||||
* Insert a hashtable entry for given tag and buffer ID
|
||||
* Insert a hashtable entry for given tag and buffer ID,
|
||||
* unless an entry already exists for that tag
|
||||
*
|
||||
* Returns -1 on successful insertion. If a conflicting entry exists
|
||||
* already, returns the buffer ID in that entry.
|
||||
*
|
||||
* Caller must hold write lock on BufMappingLock
|
||||
*/
|
||||
void
|
||||
int
|
||||
BufTableInsert(BufferTag *tagPtr, int buf_id)
|
||||
{
|
||||
BufferLookupEnt *result;
|
||||
bool found;
|
||||
|
||||
Assert(buf_id >= 0); /* -1 is reserved for not-in-table */
|
||||
Assert(tagPtr->blockNum != P_NEW); /* invalid tag */
|
||||
|
||||
result = (BufferLookupEnt *)
|
||||
hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
|
||||
|
||||
@ -109,15 +115,19 @@ BufTableInsert(BufferTag *tagPtr, int buf_id)
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory")));
|
||||
|
||||
if (found) /* found something already in the table? */
|
||||
elog(ERROR, "shared buffer hash table corrupted");
|
||||
if (found) /* found something already in the table */
|
||||
return result->id;
|
||||
|
||||
result->id = buf_id;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* BufTableDelete
|
||||
* Delete the hashtable entry for given tag (which must exist)
|
||||
*
|
||||
* Caller must hold write lock on BufMappingLock
|
||||
*/
|
||||
void
|
||||
BufTableDelete(BufferTag *tagPtr)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.62 2005/01/10 20:02:21 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.63 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -24,6 +24,10 @@
|
||||
|
||||
/*#define LBDEBUG*/
|
||||
|
||||
/* Note: this macro only works on local buffers, not shared ones! */
|
||||
#define LocalBufHdrGetBlock(bufHdr) \
|
||||
LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
|
||||
|
||||
/* should be a GUC parameter some day */
|
||||
int NLocBuffer = 64;
|
||||
|
||||
@ -39,7 +43,7 @@ static int nextFreeLocalBuf = 0;
|
||||
* allocate a local buffer. We do round robin allocation for now.
|
||||
*
|
||||
* API is similar to bufmgr.c's BufferAlloc, except that we do not need
|
||||
* to have the BufMgrLock since this is all local. Also, IO_IN_PROGRESS
|
||||
* to do any locking since this is all local. Also, IO_IN_PROGRESS
|
||||
* does not get set.
|
||||
*/
|
||||
BufferDesc *
|
||||
@ -47,11 +51,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
{
|
||||
BufferTag newTag; /* identity of requested block */
|
||||
int i;
|
||||
int trycounter;
|
||||
BufferDesc *bufHdr;
|
||||
|
||||
INIT_BUFFERTAG(newTag, reln, blockNum);
|
||||
|
||||
/* a low tech search for now -- not optimized for scans */
|
||||
/* a low tech search for now -- should use a hashtable */
|
||||
for (i = 0; i < NLocBuffer; i++)
|
||||
{
|
||||
bufHdr = &LocalBufferDescriptors[i];
|
||||
@ -81,32 +86,44 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
RelationGetRelid(reln), blockNum, -nextFreeLocalBuf - 1);
|
||||
#endif
|
||||
|
||||
/* need to get a new buffer (round robin for now) */
|
||||
bufHdr = NULL;
|
||||
for (i = 0; i < NLocBuffer; i++)
|
||||
/*
|
||||
* Need to get a new buffer. We use a clock sweep algorithm
|
||||
* (essentially the same as what freelist.c does now...)
|
||||
*/
|
||||
trycounter = NLocBuffer;
|
||||
for (;;)
|
||||
{
|
||||
int b = (nextFreeLocalBuf + i) % NLocBuffer;
|
||||
int b = nextFreeLocalBuf;
|
||||
|
||||
if (LocalRefCount[b] == 0)
|
||||
if (++nextFreeLocalBuf >= NLocBuffer)
|
||||
nextFreeLocalBuf = 0;
|
||||
|
||||
bufHdr = &LocalBufferDescriptors[b];
|
||||
|
||||
if (LocalRefCount[b] == 0 && bufHdr->usage_count == 0)
|
||||
{
|
||||
bufHdr = &LocalBufferDescriptors[b];
|
||||
LocalRefCount[b]++;
|
||||
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
||||
BufferDescriptorGetBuffer(bufHdr));
|
||||
nextFreeLocalBuf = (b + 1) % NLocBuffer;
|
||||
BufferDescriptorGetBuffer(bufHdr));
|
||||
break;
|
||||
}
|
||||
|
||||
if (bufHdr->usage_count > 0)
|
||||
{
|
||||
bufHdr->usage_count--;
|
||||
trycounter = NLocBuffer;
|
||||
}
|
||||
else if (--trycounter == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
|
||||
errmsg("no empty local buffer available")));
|
||||
}
|
||||
if (bufHdr == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
|
||||
errmsg("no empty local buffer available")));
|
||||
|
||||
/*
|
||||
* this buffer is not referenced but it might still be dirty. if
|
||||
* that's the case, write it out before reusing it!
|
||||
*/
|
||||
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
|
||||
if (bufHdr->flags & BM_DIRTY)
|
||||
{
|
||||
SMgrRelation oreln;
|
||||
|
||||
@ -116,7 +133,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
/* And write... */
|
||||
smgrwrite(oreln,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data),
|
||||
(char *) LocalBufHdrGetBlock(bufHdr),
|
||||
true);
|
||||
|
||||
LocalBufferFlushCount++;
|
||||
@ -129,7 +146,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
* use, so it's okay to do it (and possibly error out) before marking
|
||||
* the buffer as not dirty.
|
||||
*/
|
||||
if (bufHdr->data == (SHMEM_OFFSET) 0)
|
||||
if (LocalBufHdrGetBlock(bufHdr) == NULL)
|
||||
{
|
||||
char *data = (char *) malloc(BLCKSZ);
|
||||
|
||||
@ -138,17 +155,10 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/*
|
||||
* This is a bit of a hack: bufHdr->data needs to be a shmem
|
||||
* offset for consistency with the shared-buffer case, so make it
|
||||
* one even though it's not really a valid shmem offset.
|
||||
*/
|
||||
bufHdr->data = MAKE_OFFSET(data);
|
||||
|
||||
/*
|
||||
* Set pointer for use by BufferGetBlock() macro.
|
||||
*/
|
||||
LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data;
|
||||
LocalBufHdrGetBlock(bufHdr) = (Block) data;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -156,7 +166,8 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
*/
|
||||
bufHdr->tag = newTag;
|
||||
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
||||
bufHdr->cntxDirty = false;
|
||||
bufHdr->flags |= BM_TAG_VALID;
|
||||
bufHdr->usage_count = 0;
|
||||
|
||||
*foundPtr = FALSE;
|
||||
return bufHdr;
|
||||
@ -170,6 +181,7 @@ void
|
||||
WriteLocalBuffer(Buffer buffer, bool release)
|
||||
{
|
||||
int bufid;
|
||||
BufferDesc *bufHdr;
|
||||
|
||||
Assert(BufferIsLocal(buffer));
|
||||
|
||||
@ -178,12 +190,18 @@ WriteLocalBuffer(Buffer buffer, bool release)
|
||||
#endif
|
||||
|
||||
bufid = -(buffer + 1);
|
||||
LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
|
||||
|
||||
Assert(LocalRefCount[bufid] > 0);
|
||||
|
||||
bufHdr = &LocalBufferDescriptors[bufid];
|
||||
bufHdr->flags |= BM_DIRTY;
|
||||
|
||||
if (release)
|
||||
{
|
||||
Assert(LocalRefCount[bufid] > 0);
|
||||
LocalRefCount[bufid]--;
|
||||
if (LocalRefCount[bufid] == 0 &&
|
||||
bufHdr->usage_count < BM_MAX_USAGE_COUNT)
|
||||
bufHdr->usage_count++;
|
||||
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Written by Peter Eisentraut <peter_e@gmx.net>.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.253 2005/03/01 20:23:34 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.254 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*--------------------------------------------------------------------
|
||||
*/
|
||||
@ -77,7 +77,6 @@ extern bool Log_disconnections;
|
||||
extern DLLIMPORT bool check_function_bodies;
|
||||
extern int CommitDelay;
|
||||
extern int CommitSiblings;
|
||||
extern int DebugSharedBuffers;
|
||||
extern char *default_tablespace;
|
||||
|
||||
static const char *assign_log_destination(const char *value,
|
||||
@ -1230,15 +1229,6 @@ static struct config_int ConfigureNamesInt[] =
|
||||
-1, -1, INT_MAX / 1000, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"debug_shared_buffers", PGC_POSTMASTER, STATS_MONITORING,
|
||||
gettext_noop("Interval to report shared buffer status in seconds"),
|
||||
NULL
|
||||
},
|
||||
&DebugSharedBuffers,
|
||||
0, 0, 600, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"bgwriter_delay", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer sleep time between rounds in milliseconds"),
|
||||
@ -1249,21 +1239,21 @@ static struct config_int ConfigureNamesInt[] =
|
||||
},
|
||||
|
||||
{
|
||||
{"bgwriter_percent", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer percentage of dirty buffers to flush per round"),
|
||||
{"bgwriter_lru_maxpages", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer maximum number of all pages to flush per round"),
|
||||
NULL
|
||||
},
|
||||
&BgWriterPercent,
|
||||
1, 0, 100, NULL, NULL
|
||||
&bgwriter_lru_maxpages,
|
||||
5, 0, 1000, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"bgwriter_maxpages", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer maximum number of pages to flush per round"),
|
||||
{"bgwriter_all_maxpages", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer maximum number of LRU pages to flush per round"),
|
||||
NULL
|
||||
},
|
||||
&BgWriterMaxPages,
|
||||
100, 0, 1000, NULL, NULL
|
||||
&bgwriter_all_maxpages,
|
||||
5, 0, 1000, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
@ -1394,6 +1384,24 @@ static struct config_real ConfigureNamesReal[] =
|
||||
MAX_GEQO_SELECTION_BIAS, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"bgwriter_lru_percent", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer percentage of LRU buffers to flush per round"),
|
||||
NULL
|
||||
},
|
||||
&bgwriter_lru_percent,
|
||||
1.0, 0.0, 100.0, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"bgwriter_all_percent", PGC_SIGHUP, RESOURCES,
|
||||
gettext_noop("Background writer percentage of all buffers to flush per round"),
|
||||
NULL
|
||||
},
|
||||
&bgwriter_all_percent,
|
||||
0.333, 0.0, 100.0, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"seed", PGC_USERSET, UNGROUPED,
|
||||
gettext_noop("Sets the seed for random-number generation."),
|
||||
|
@ -99,8 +99,10 @@
|
||||
# - Background writer -
|
||||
|
||||
#bgwriter_delay = 200 # 10-10000 milliseconds between rounds
|
||||
#bgwriter_percent = 1 # 0-100% of dirty buffers in each round
|
||||
#bgwriter_maxpages = 100 # 0-1000 buffers max per round
|
||||
#bgwriter_lru_percent = 1.0 # 0-100% of LRU buffers scanned in each round
|
||||
#bgwriter_lru_maxpages = 5 # 0-1000 buffers max written per round
|
||||
#bgwriter_all_percent = 0.333 # 0-100% of all buffers scanned in each round
|
||||
#bgwriter_all_maxpages = 5 # 0-1000 buffers max written per round
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
@ -14,7 +14,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/resowner/resowner.c,v 1.9 2004/12/31 22:02:50 pgsql Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/resowner/resowner.c,v 1.10 2005/03/04 20:21:06 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -200,12 +200,7 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
|
||||
* that would indicate failure to clean up the executor correctly ---
|
||||
* so issue warnings. In the abort case, just clean up quietly.
|
||||
*
|
||||
* XXX this is fairly inefficient due to multiple BufMgrLock
|
||||
* grabs if there are lots of buffers to be released, but we
|
||||
* don't expect many (indeed none in the success case) so it's
|
||||
* probably not worth optimizing.
|
||||
*
|
||||
* We are however careful to release back-to-front, so as to
|
||||
* We are careful to do the releasing back-to-front, so as to
|
||||
* avoid O(N^2) behavior in ResourceOwnerForgetBuffer().
|
||||
*/
|
||||
while (owner->nbuffers > 0)
|
||||
|
Reference in New Issue
Block a user