1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-13 07:41:39 +03:00

Accelerate end-of-transaction dropping of relations

When relations are dropped, at end of transaction we need to remove the
files and clean the buffer pool of buffers containing pages of those
relations.  Previously we would scan the buffer pool once per relation
to clean up buffers.  When there are many relations to drop, the
repeated scans make this process slow; so we now instead pass a list of
relations to drop and scan the pool once, checking each buffer against
the passed list.  When the number of relations is larger than a
threshold (which as of this patch is being set to 20 relations) we sort
the array before starting, and bsearch the array; when it's smaller, we
simply scan the array linearly each time, because that's faster.  The
exact optimal threshold value depends on many factors, but the
difference is not likely to be significant enough to justify making it
user-settable.

This has been measured to be a significant win (a 15x win when dropping
100,000 relations; an extreme case, but reportedly a real one).

Author: Tomas Vondra, some tweaks by me
Reviewed by: Robert Haas, Shigeru Hanada, Andres Freund, Álvaro Herrera
This commit is contained in:
Alvaro Herrera
2013-01-17 15:55:10 -03:00
parent 0b6329130e
commit 279628a0a7
5 changed files with 206 additions and 14 deletions

View File

@ -62,6 +62,7 @@
#define BUF_WRITTEN 0x01
#define BUF_REUSABLE 0x02
#define DROP_RELS_BSEARCH_THRESHOLD 20
/* GUC variables */
bool zero_damaged_pages = false;
@ -107,6 +108,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
bool *foundPtr);
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
static void AtProcExit_Buffers(int code, Datum arg);
static int rnode_comparator(const void *p1, const void *p2);
/*
@ -2086,43 +2088,103 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
}
/* ---------------------------------------------------------------------
* DropRelFileNodeAllBuffers
* DropRelFileNodesAllBuffers
*
* This function removes from the buffer pool all the pages of all
* forks of the specified relation. It's equivalent to calling
* DropRelFileNodeBuffers once per fork with firstDelBlock = 0.
* forks of the specified relations. It's equivalent to calling
* DropRelFileNodeBuffers once per fork per relation with
* firstDelBlock = 0.
* --------------------------------------------------------------------
*/
void
DropRelFileNodeAllBuffers(RelFileNodeBackend rnode)
DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
{
int i;
int i,
n = 0;
RelFileNode *nodes;
bool use_bsearch;
if (nnodes == 0)
return;
nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
/* If it's a local relation, it's localbuf.c's problem. */
if (RelFileNodeBackendIsTemp(rnode))
for (i = 0; i < nnodes; i++)
{
if (rnode.backend == MyBackendId)
DropRelFileNodeAllLocalBuffers(rnode.node);
if (RelFileNodeBackendIsTemp(rnodes[i]))
{
if (rnodes[i].backend == MyBackendId)
DropRelFileNodeAllLocalBuffers(rnodes[i].node);
}
else
nodes[n++] = rnodes[i].node;
}
/*
* If there are no non-local relations, then we're done. Release the memory
* and return.
*/
if (n == 0)
{
pfree(nodes);
return;
}
/*
* For low number of relations to drop just use a simple walk through, to
* save the bsearch overhead. The threshold to use is rather a guess than a
* exactly determined value, as it depends on many factors (CPU and RAM
* speeds, amount of shared buffers etc.).
*/
use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
/* sort the list of rnodes if necessary */
if (use_bsearch)
pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
for (i = 0; i < NBuffers; i++)
{
RelFileNode *rnode = NULL;
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
* and saves some cycles.
*/
if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
if (!use_bsearch)
{
int j;
for (j = 0; j < n; j++)
{
if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
{
rnode = &nodes[j];
break;
}
}
}
else
{
rnode = bsearch((const void *) &(bufHdr->tag.rnode),
nodes, n, sizeof(RelFileNode),
rnode_comparator);
}
/* buffer doesn't belong to any of the given relfilenodes; skip it */
if (rnode == NULL)
continue;
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
InvalidateBuffer(bufHdr); /* releases spinlock */
else
UnlockBufHdr(bufHdr);
}
pfree(nodes);
}
/* ---------------------------------------------------------------------
@ -2953,3 +3015,30 @@ local_buffer_write_error_callback(void *arg)
pfree(path);
}
}
/*
* RelFileNode qsort/bsearch comparator; see RelFileNodeEquals.
*/
static int
rnode_comparator(const void *p1, const void *p2)
{
RelFileNode n1 = *(RelFileNode *) p1;
RelFileNode n2 = *(RelFileNode *) p2;
if (n1.relNode < n2.relNode)
return -1;
else if (n1.relNode > n2.relNode)
return 1;
if (n1.dbNode < n2.dbNode)
return -1;
else if (n1.dbNode > n2.dbNode)
return 1;
if (n1.spcNode < n2.spcNode)
return -1;
else if (n1.spcNode > n2.spcNode)
return 1;
else
return 0;
}