mirror of
https://github.com/postgres/postgres.git
synced 2025-08-28 18:48:04 +03:00
This was not changed in HEAD, but will be done later as part of a pgindent run. Future pgindent runs will also do this. Report by Tom Lane Backpatch through all supported branches, but not HEAD
375 lines
10 KiB
C
375 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* fsmpage.c
|
|
* routines to search and manipulate one FSM page.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/storage/freespace/fsmpage.c
|
|
*
|
|
* NOTES:
|
|
*
|
|
* The public functions in this file form an API that hides the internal
|
|
* structure of a FSM page. This allows freespace.c to treat each FSM page
|
|
* as a black box with SlotsPerPage "slots". fsm_set_avail() and
|
|
* fsm_get_avail() let you get/set the value of a slot, and
|
|
* fsm_search_avail() lets you search for a slot with value >= X.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "storage/bufmgr.h"
|
|
#include "storage/fsm_internals.h"
|
|
|
|
/* Macros to navigate the tree within a page. Root has index zero. */
|
|
#define leftchild(x) (2 * (x) + 1)
|
|
#define rightchild(x) (2 * (x) + 2)
|
|
#define parentof(x) (((x) - 1) / 2)
|
|
|
|
/*
|
|
* Find right neighbor of x, wrapping around within the level
|
|
*/
|
|
static int
|
|
rightneighbor(int x)
|
|
{
|
|
/*
|
|
* Move right. This might wrap around, stepping to the leftmost node at
|
|
* the next level.
|
|
*/
|
|
x++;
|
|
|
|
/*
|
|
* Check if we stepped to the leftmost node at next level, and correct if
|
|
* so. The leftmost nodes at each level are numbered x = 2^level - 1, so
|
|
* check if (x + 1) is a power of two, using a standard
|
|
* twos-complement-arithmetic trick.
|
|
*/
|
|
if (((x + 1) & x) == 0)
|
|
x = parentof(x);
|
|
|
|
return x;
|
|
}
|
|
|
|
/*
|
|
* Sets the value of a slot on page. Returns true if the page was modified.
|
|
*
|
|
* The caller must hold an exclusive lock on the page.
|
|
*/
|
|
bool
|
|
fsm_set_avail(Page page, int slot, uint8 value)
|
|
{
|
|
int nodeno = NonLeafNodesPerPage + slot;
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
uint8 oldvalue;
|
|
|
|
Assert(slot < LeafNodesPerPage);
|
|
|
|
oldvalue = fsmpage->fp_nodes[nodeno];
|
|
|
|
/* If the value hasn't changed, we don't need to do anything */
|
|
if (oldvalue == value && value <= fsmpage->fp_nodes[0])
|
|
return false;
|
|
|
|
fsmpage->fp_nodes[nodeno] = value;
|
|
|
|
/*
|
|
* Propagate up, until we hit the root or a node that doesn't need to be
|
|
* updated.
|
|
*/
|
|
do
|
|
{
|
|
uint8 newvalue = 0;
|
|
int lchild;
|
|
int rchild;
|
|
|
|
nodeno = parentof(nodeno);
|
|
lchild = leftchild(nodeno);
|
|
rchild = lchild + 1;
|
|
|
|
newvalue = fsmpage->fp_nodes[lchild];
|
|
if (rchild < NodesPerPage)
|
|
newvalue = Max(newvalue,
|
|
fsmpage->fp_nodes[rchild]);
|
|
|
|
oldvalue = fsmpage->fp_nodes[nodeno];
|
|
if (oldvalue == newvalue)
|
|
break;
|
|
|
|
fsmpage->fp_nodes[nodeno] = newvalue;
|
|
} while (nodeno > 0);
|
|
|
|
/*
|
|
* sanity check: if the new value is (still) higher than the value at the
|
|
* top, the tree is corrupt. If so, rebuild.
|
|
*/
|
|
if (value > fsmpage->fp_nodes[0])
|
|
fsm_rebuild_page(page);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Returns the value of given slot on page.
|
|
*
|
|
* Since this is just a read-only access of a single byte, the page doesn't
|
|
* need to be locked.
|
|
*/
|
|
uint8
|
|
fsm_get_avail(Page page, int slot)
|
|
{
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
|
|
Assert(slot < LeafNodesPerPage);
|
|
|
|
return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
|
|
}
|
|
|
|
/*
|
|
* Returns the value at the root of a page.
|
|
*
|
|
* Since this is just a read-only access of a single byte, the page doesn't
|
|
* need to be locked.
|
|
*/
|
|
uint8
|
|
fsm_get_max_avail(Page page)
|
|
{
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
|
|
return fsmpage->fp_nodes[0];
|
|
}
|
|
|
|
/*
|
|
* Searches for a slot with category at least minvalue.
|
|
* Returns slot number, or -1 if none found.
|
|
*
|
|
* The caller must hold at least a shared lock on the page, and this
|
|
* function can unlock and lock the page again in exclusive mode if it
|
|
* needs to be updated. exclusive_lock_held should be set to true if the
|
|
* caller is already holding an exclusive lock, to avoid extra work.
|
|
*
|
|
* If advancenext is false, fp_next_slot is set to point to the returned
|
|
* slot, and if it's true, to the slot after the returned slot.
|
|
*/
|
|
int
|
|
fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
|
|
bool exclusive_lock_held)
|
|
{
|
|
Page page = BufferGetPage(buf);
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
int nodeno;
|
|
int target;
|
|
uint16 slot;
|
|
|
|
restart:
|
|
|
|
/*
|
|
* Check the root first, and exit quickly if there's no leaf with enough
|
|
* free space
|
|
*/
|
|
if (fsmpage->fp_nodes[0] < minvalue)
|
|
return -1;
|
|
|
|
/*
|
|
* Start search using fp_next_slot. It's just a hint, so check that it's
|
|
* sane. (This also handles wrapping around when the prior call returned
|
|
* the last slot on the page.)
|
|
*/
|
|
target = fsmpage->fp_next_slot;
|
|
if (target < 0 || target >= LeafNodesPerPage)
|
|
target = 0;
|
|
target += NonLeafNodesPerPage;
|
|
|
|
/*----------
|
|
* Start the search from the target slot. At every step, move one
|
|
* node to the right, then climb up to the parent. Stop when we reach
|
|
* a node with enough free space (as we must, since the root has enough
|
|
* space).
|
|
*
|
|
* The idea is to gradually expand our "search triangle", that is, all
|
|
* nodes covered by the current node, and to be sure we search to the
|
|
* right from the start point. At the first step, only the target slot
|
|
* is examined. When we move up from a left child to its parent, we are
|
|
* adding the right-hand subtree of that parent to the search triangle.
|
|
* When we move right then up from a right child, we are dropping the
|
|
* current search triangle (which we know doesn't contain any suitable
|
|
* page) and instead looking at the next-larger-size triangle to its
|
|
* right. So we never look left from our original start point, and at
|
|
* each step the size of the search triangle doubles, ensuring it takes
|
|
* only log2(N) work to search N pages.
|
|
*
|
|
* The "move right" operation will wrap around if it hits the right edge
|
|
* of the tree, so the behavior is still good if we start near the right.
|
|
* Note also that the move-and-climb behavior ensures that we can't end
|
|
* up on one of the missing nodes at the right of the leaf level.
|
|
*
|
|
* For example, consider this tree:
|
|
*
|
|
* 7
|
|
* 7 6
|
|
* 5 7 6 5
|
|
* 4 5 5 7 2 6 5 2
|
|
* T
|
|
*
|
|
* Assume that the target node is the node indicated by the letter T,
|
|
* and we're searching for a node with value of 6 or higher. The search
|
|
* begins at T. At the first iteration, we move to the right, then to the
|
|
* parent, arriving at the rightmost 5. At the second iteration, we move
|
|
* to the right, wrapping around, then climb up, arriving at the 7 on the
|
|
* third level. 7 satisfies our search, so we descend down to the bottom,
|
|
* following the path of sevens. This is in fact the first suitable page
|
|
* to the right of (allowing for wraparound) our start point.
|
|
*----------
|
|
*/
|
|
nodeno = target;
|
|
while (nodeno > 0)
|
|
{
|
|
if (fsmpage->fp_nodes[nodeno] >= minvalue)
|
|
break;
|
|
|
|
/*
|
|
* Move to the right, wrapping around on same level if necessary, then
|
|
* climb up.
|
|
*/
|
|
nodeno = parentof(rightneighbor(nodeno));
|
|
}
|
|
|
|
/*
|
|
* We're now at a node with enough free space, somewhere in the middle of
|
|
* the tree. Descend to the bottom, following a path with enough free
|
|
* space, preferring to move left if there's a choice.
|
|
*/
|
|
while (nodeno < NonLeafNodesPerPage)
|
|
{
|
|
int childnodeno = leftchild(nodeno);
|
|
|
|
if (childnodeno < NodesPerPage &&
|
|
fsmpage->fp_nodes[childnodeno] >= minvalue)
|
|
{
|
|
nodeno = childnodeno;
|
|
continue;
|
|
}
|
|
childnodeno++; /* point to right child */
|
|
if (childnodeno < NodesPerPage &&
|
|
fsmpage->fp_nodes[childnodeno] >= minvalue)
|
|
{
|
|
nodeno = childnodeno;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Oops. The parent node promised that either left or right child
|
|
* has enough space, but neither actually did. This can happen in
|
|
* case of a "torn page", IOW if we crashed earlier while writing
|
|
* the page to disk, and only part of the page made it to disk.
|
|
*
|
|
* Fix the corruption and restart.
|
|
*/
|
|
RelFileNode rnode;
|
|
ForkNumber forknum;
|
|
BlockNumber blknum;
|
|
|
|
BufferGetTag(buf, &rnode, &forknum, &blknum);
|
|
elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
|
|
blknum, rnode.spcNode, rnode.dbNode, rnode.relNode);
|
|
|
|
/* make sure we hold an exclusive lock */
|
|
if (!exclusive_lock_held)
|
|
{
|
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
|
exclusive_lock_held = true;
|
|
}
|
|
fsm_rebuild_page(page);
|
|
MarkBufferDirty(buf);
|
|
goto restart;
|
|
}
|
|
}
|
|
|
|
/* We're now at the bottom level, at a node with enough space. */
|
|
slot = nodeno - NonLeafNodesPerPage;
|
|
|
|
/*
|
|
* Update the next-target pointer. Note that we do this even if we're only
|
|
* holding a shared lock, on the grounds that it's better to use a shared
|
|
* lock and get a garbled next pointer every now and then, than take the
|
|
* concurrency hit of an exclusive lock.
|
|
*
|
|
* Wrap-around is handled at the beginning of this function.
|
|
*/
|
|
fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
|
|
|
|
return slot;
|
|
}
|
|
|
|
/*
|
|
* Sets the available space to zero for all slots numbered >= nslots.
|
|
* Returns true if the page was modified.
|
|
*/
|
|
bool
|
|
fsm_truncate_avail(Page page, int nslots)
|
|
{
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
uint8 *ptr;
|
|
bool changed = false;
|
|
|
|
Assert(nslots >= 0 && nslots < LeafNodesPerPage);
|
|
|
|
/* Clear all truncated leaf nodes */
|
|
ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
|
|
for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
|
|
{
|
|
if (*ptr != 0)
|
|
changed = true;
|
|
*ptr = 0;
|
|
}
|
|
|
|
/* Fix upper nodes. */
|
|
if (changed)
|
|
fsm_rebuild_page(page);
|
|
|
|
return changed;
|
|
}
|
|
|
|
/*
|
|
* Reconstructs the upper levels of a page. Returns true if the page
|
|
* was modified.
|
|
*/
|
|
bool
|
|
fsm_rebuild_page(Page page)
|
|
{
|
|
FSMPage fsmpage = (FSMPage) PageGetContents(page);
|
|
bool changed = false;
|
|
int nodeno;
|
|
|
|
/*
|
|
* Start from the lowest non-leaf level, at last node, working our way
|
|
* backwards, through all non-leaf nodes at all levels, up to the root.
|
|
*/
|
|
for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
|
|
{
|
|
int lchild = leftchild(nodeno);
|
|
int rchild = lchild + 1;
|
|
uint8 newvalue = 0;
|
|
|
|
/* The first few nodes we examine might have zero or one child. */
|
|
if (lchild < NodesPerPage)
|
|
newvalue = fsmpage->fp_nodes[lchild];
|
|
|
|
if (rchild < NodesPerPage)
|
|
newvalue = Max(newvalue,
|
|
fsmpage->fp_nodes[rchild]);
|
|
|
|
if (fsmpage->fp_nodes[nodeno] != newvalue)
|
|
{
|
|
fsmpage->fp_nodes[nodeno] = newvalue;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
return changed;
|
|
}
|