1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Rewrite the FSM. Instead of relying on a fixed-size shared memory segment, the

free space information is stored in a dedicated FSM relation fork, with each
relation (except for hash indexes; they don't use FSM).

This eliminates the max_fsm_relations and max_fsm_pages GUC options; remove any
trace of them from the backend, initdb, and documentation.

Rewrite contrib/pg_freespacemap to match the new FSM implementation. Also
introduce a new variant of the get_raw_page(regclass, int4, int4) function in
contrib/pageinspect that let's you to return pages from any relation fork, and
a new fsm_page_contents() function to inspect the new FSM pages.
This commit is contained in:
Heikki Linnakangas
2008-09-30 10:52:14 +00:00
parent 2dbc0ca937
commit 15c121b3ed
53 changed files with 1844 additions and 2720 deletions

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.14 2008/07/11 21:06:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.15 2008/09/30 10:52:10 heikki Exp $
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
@@ -283,6 +284,9 @@ ginbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
initGinState(&buildstate.ginstate, index);
/* initialize the root page */

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.16 2008/07/11 21:06:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.17 2008/09/30 10:52:10 heikki Exp $
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
#include "catalog/pg_type.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
void
@@ -151,7 +152,7 @@ GinNewBuffer(Relation index)
/* First, try to get a page from FSM */
for (;;)
{
BlockNumber blkno = GetFreeIndexPage(&index->rd_node);
BlockNumber blkno = GetFreeIndexPage(index);
if (blkno == InvalidBlockNumber)
break;

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.21 2008/07/11 21:06:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.22 2008/09/30 10:52:10 heikki Exp $
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,7 @@
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
typedef struct
@@ -678,10 +679,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
bool needLock;
BlockNumber npages,
blkno;
BlockNumber totFreePages,
nFreePages,
*freePages,
maxFreePages;
BlockNumber totFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
@@ -711,12 +709,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
if (needLock)
UnlockRelationForExtension(index, ExclusiveLock);
maxFreePages = npages;
if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages;
totFreePages = nFreePages = 0;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
totFreePages = 0;
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
{
@@ -731,8 +724,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
if (GinPageIsDeleted(page))
{
if (nFreePages < maxFreePages)
freePages[nFreePages++] = blkno;
RecordFreeIndexPage(index, blkno);
totFreePages++;
}
else
@@ -742,25 +734,16 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
}
lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0)
if (info->vacuum_full && lastBlock > lastFilledBlock)
{
/* try to truncate index */
int i;
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
totFreePages = nFreePages = i;
break;
}
if (lastBlock > lastFilledBlock)
RelationTruncate(index, lastFilledBlock + 1);
FreeSpaceMapTruncateRel(index, lastFilledBlock + 1);
RelationTruncate(index, lastFilledBlock + 1);
stats->pages_removed = lastBlock - lastFilledBlock;
totFreePages = totFreePages - stats->pages_removed;
}
RecordIndexFreeSpace(&index->rd_node, totFreePages, nFreePages, freePages);
stats->pages_free = totFreePages;
if (needLock)

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.151 2008/06/12 09:12:29 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.152 2008/09/30 10:52:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
const XLogRecPtr XLogRecPtrForTemp = {1, 1};
@@ -102,6 +103,9 @@ gistbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
/* no locking is needed */
initGISTstate(&buildstate.giststate, index);

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.30 2008/07/13 20:45:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.31 2008/09/30 10:52:10 heikki Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -16,6 +16,7 @@
#include "access/gist_private.h"
#include "access/reloptions.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
@@ -617,7 +618,7 @@ gistNewBuffer(Relation r)
/* First, try to get a page from FSM */
for (;;)
{
BlockNumber blkno = GetFreeIndexPage(&r->rd_node);
BlockNumber blkno = GetFreeIndexPage(r);
if (blkno == InvalidBlockNumber)
break; /* nothing left in FSM */

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.36 2008/06/12 09:12:30 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.37 2008/09/30 10:52:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,7 @@
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "utils/memutils.h"
@@ -518,10 +519,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
Relation rel = info->index;
BlockNumber npages,
blkno;
BlockNumber totFreePages,
nFreePages,
*freePages,
maxFreePages;
BlockNumber totFreePages;
BlockNumber lastBlock = GIST_ROOT_BLKNO,
lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
@@ -589,13 +587,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock);
maxFreePages = npages;
if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages;
totFreePages = nFreePages = 0;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
totFreePages = 0;
for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
{
Buffer buffer;
@@ -609,9 +601,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
if (PageIsNew(page) || GistPageIsDeleted(page))
{
if (nFreePages < maxFreePages)
freePages[nFreePages++] = blkno;
totFreePages++;
RecordFreeIndexPage(rel, blkno);
}
else
lastFilledBlock = blkno;
@@ -619,25 +610,15 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
}
lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0)
if (info->vacuum_full && lastFilledBlock < lastBlock)
{ /* try to truncate index */
int i;
FreeSpaceMapTruncateRel(rel, lastFilledBlock + 1);
RelationTruncate(rel, lastFilledBlock + 1);
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
totFreePages = nFreePages = i;
break;
}
if (lastBlock > lastFilledBlock)
RelationTruncate(rel, lastFilledBlock + 1);
stats->std.pages_removed = lastBlock - lastFilledBlock;
totFreePages = totFreePages - stats->std.pages_removed;
}
RecordIndexFreeSpace(&rel->rd_node, totFreePages, nFreePages, freePages);
pfree(freePages);
/* return statistics */
stats->std.pages_free = totFreePages;
if (needLock)

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.263 2008/09/11 14:01:09 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.264 2008/09/30 10:52:10 heikki Exp $
*
*
* INTERFACE ROUTINES
@@ -4721,6 +4721,9 @@ heap_sync(Relation rel)
/* FlushRelationBuffers will have opened rd_smgr */
smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM);
/* sync FSM as well */
smgrimmedsync(rel->rd_smgr, FSM_FORKNUM);
/* toast heap, if any */
if (OidIsValid(rel->rd_rel->reltoastrelid))
{
@@ -4729,6 +4732,7 @@ heap_sync(Relation rel)
toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
FlushRelationBuffers(toastrel);
smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM);
smgrimmedsync(toastrel->rd_smgr, FSM_FORKNUM);
heap_close(toastrel, AccessShareLock);
}
}

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.72 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -163,8 +163,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* We have no cached target page, so ask the FSM for an initial
* target.
*/
targetBlock = GetPageWithFreeSpace(&relation->rd_node,
len + saveFreeSpace);
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
/*
* If the FSM knows nothing of the rel, try the last page before we
@@ -250,7 +249,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* Update FSM as to condition of this page, and ask for another page
* to try.
*/
targetBlock = RecordAndGetPageWithFreeSpace(&relation->rd_node,
targetBlock = RecordAndGetPageWithFreeSpace(relation,
targetBlock,
pageFreeSpace,
len + saveFreeSpace);

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.110 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.111 2008/09/30 10:52:10 heikki Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -27,6 +27,7 @@
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "utils/inval.h"
#include "utils/snapmgr.h"
@@ -501,7 +502,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
*/
for (;;)
{
blkno = GetFreeIndexPage(&rel->rd_node);
blkno = GetFreeIndexPage(rel);
if (blkno == InvalidBlockNumber)
break;
buf = ReadBuffer(rel, blkno);

View File

@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.161 2008/06/19 00:46:03 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.162 2008/09/30 10:52:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,7 @@
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "utils/memutils.h"
@@ -56,9 +57,7 @@ typedef struct
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
BlockNumber *freePages;
int nFreePages; /* number of entries in freePages[] */
int maxFreePages; /* allocated size of freePages[] */
BlockNumber lastUsedPage;
BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext;
} BTVacState;
@@ -110,6 +109,9 @@ btbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);
/*
@@ -623,9 +625,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
vstate.callback = callback;
vstate.callback_state = callback_state;
vstate.cycleid = cycleid;
vstate.freePages = NULL; /* temporarily */
vstate.nFreePages = 0;
vstate.maxFreePages = 0;
vstate.lastUsedPage = BTREE_METAPAGE;
vstate.totFreePages = 0;
/* Create a temporary memory context to run _bt_pagedel in */
@@ -670,17 +670,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock);
/* Allocate freePages after we read num_pages the first time */
if (vstate.freePages == NULL)
{
/* No point in remembering more than MaxFSMPages pages */
vstate.maxFreePages = MaxFSMPages;
if ((BlockNumber) vstate.maxFreePages > num_pages)
vstate.maxFreePages = (int) num_pages;
vstate.freePages = (BlockNumber *)
palloc(vstate.maxFreePages * sizeof(BlockNumber));
}
/* Quit if we've scanned the whole relation */
if (blkno >= num_pages)
break;
@@ -697,42 +686,22 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
* acquiring exclusive lock on the index and then rechecking all the
* pages; doesn't seem worth it.
*/
if (info->vacuum_full && vstate.nFreePages > 0)
if (info->vacuum_full && vstate.lastUsedPage < num_pages - 1)
{
BlockNumber new_pages = num_pages;
BlockNumber new_pages = vstate.lastUsedPage + 1;
while (vstate.nFreePages > 0 &&
vstate.freePages[vstate.nFreePages - 1] == new_pages - 1)
{
new_pages--;
stats->pages_deleted--;
vstate.nFreePages--;
vstate.totFreePages = vstate.nFreePages; /* can't be more */
}
if (new_pages != num_pages)
{
/*
* Okay to truncate.
*/
RelationTruncate(rel, new_pages);
/*
* Okay to truncate.
*/
FreeSpaceMapTruncateRel(rel, new_pages);
RelationTruncate(rel, new_pages);
/* update statistics */
stats->pages_removed += num_pages - new_pages;
num_pages = new_pages;
}
/* update statistics */
stats->pages_removed += num_pages - new_pages;
vstate.totFreePages -= (num_pages - new_pages);
num_pages = new_pages;
}
/*
* Update the shared Free Space Map with the info we now have about free
* pages in the index, discarding any old info the map may have. We do not
* need to sort the page numbers; they're in order already.
*/
RecordIndexFreeSpace(&rel->rd_node, vstate.totFreePages,
vstate.nFreePages, vstate.freePages);
pfree(vstate.freePages);
MemoryContextDelete(vstate.pagedelcontext);
/* update statistics */
@@ -788,8 +757,7 @@ restart:
/*
* If we are recursing, the only case we want to do anything with is a
* live leaf page having the current vacuum cycle ID. Any other state
* implies we already saw the page (eg, deleted it as being empty). In
* particular, we don't want to risk adding it to freePages twice.
* implies we already saw the page (eg, deleted it as being empty).
*/
if (blkno != orig_blkno)
{
@@ -803,12 +771,15 @@ restart:
}
}
/* If the page is in use, update lastUsedPage */
if (!_bt_page_recyclable(page) && vstate->lastUsedPage < blkno)
vstate->lastUsedPage = blkno;
/* Page is valid, see what to do with it */
if (_bt_page_recyclable(page))
{
/* Okay to recycle this page */
if (vstate->nFreePages < vstate->maxFreePages)
vstate->freePages[vstate->nFreePages++] = blkno;
RecordFreeIndexPage(rel, blkno);
vstate->totFreePages++;
stats->pages_deleted++;
}
@@ -944,8 +915,7 @@ restart:
*/
if (ndel && info->vacuum_full)
{
if (vstate->nFreePages < vstate->maxFreePages)
vstate->freePages[vstate->nFreePages++] = blkno;
RecordFreeIndexPage(rel, blkno);
vstate->totFreePages++;
}

View File

@@ -52,12 +52,14 @@
* we log the completed index pages to WAL if and only if WAL archiving is
* active.
*
* This code isn't concerned about the FSM at all. The caller is responsible
* for initializing that.
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.117 2008/08/11 11:05:10 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.118 2008/09/30 10:52:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/

View File

@@ -3,7 +3,7 @@
*
* Resource managers definition
*
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.25 2006/11/05 22:42:07 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.26 2008/09/30 10:52:11 heikki Exp $
*/
#include "postgres.h"
@@ -19,6 +19,7 @@
#include "commands/dbcommands.h"
#include "commands/sequence.h"
#include "commands/tablespace.h"
#include "storage/freespace.h"
#include "storage/smgr.h"
@@ -30,7 +31,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL},
{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL},
{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL},
{"Reserved 7", NULL, NULL, NULL, NULL, NULL},
{"FreeSpaceMap", fsm_redo, fsm_desc, NULL, NULL, NULL},
{"Reserved 8", NULL, NULL, NULL, NULL, NULL},
{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL},
{"Heap", heap_redo, heap_desc, NULL, NULL, NULL},

View File

@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.59 2008/09/30 10:52:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -359,6 +359,7 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
rel->rd_targblock = InvalidBlockNumber;
rel->rd_fsm_nblocks_cache = InvalidBlockNumber;
rel->rd_smgr = NULL;
return rel;