mirror of
https://github.com/postgres/postgres.git
synced 2025-11-07 19:06:32 +03:00
Revert "Avoid creation of the free space map for small heap relations."
This reverts commit ac88d2962a.
This commit is contained in:
@@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page
|
||||
exists and the relation must be extended by one page. As of PostgreSQL 8.4
|
||||
each relation has its own, extensible free space map stored in a separate
|
||||
"fork" of its relation. This eliminates the disadvantages of the former
|
||||
fixed-size FSM. There are two exceptions:
|
||||
|
||||
1. Hash indexes never have a FSM.
|
||||
2. For very small tables, a 3-page relation fork would be relatively large
|
||||
and wasteful, so to save space we refrain from creating the FSM if the
|
||||
heap has HEAP_FSM_CREATION_THRESHOLD pages or fewer.
|
||||
|
||||
To locate free space in the latter case, we simply try pages directly without
|
||||
knowing ahead of time how much free space they have. To maintain good
|
||||
performance, we create a local in-memory map of pages to try, and only mark
|
||||
every other page as available. For example, in a 3-page heap, the local map
|
||||
would look like:
|
||||
|
||||
ANAN
|
||||
0123
|
||||
|
||||
Pages 0 and 2 are marked "available", and page 1 as "not available".
|
||||
Page 3 is beyond the end of the relation, so is likewise marked "not
|
||||
available". First we try page 2, and if that doesn't have sufficient free
|
||||
space we try page 0 before giving up and extending the relation. There may
|
||||
be some wasted free space on block 1, but if the relation extends to 4 pages:
|
||||
|
||||
NANA
|
||||
0123
|
||||
|
||||
We not only have the new page 3 at our disposal, we can now check page 1
|
||||
for free space as well.
|
||||
|
||||
Once the FSM is created for a heap we don't remove it even if somebody deletes
|
||||
all the rows from the corresponding relation. We don't think it is a useful
|
||||
optimization as it is quite likely that relation will again grow to the same
|
||||
size.
|
||||
|
||||
FSM data structure
|
||||
------------------
|
||||
fixed-size FSM.
|
||||
|
||||
It is important to keep the map small so that it can be searched rapidly.
|
||||
Therefore, we don't attempt to record the exact free space on a page.
|
||||
@@ -226,3 +192,5 @@ TODO
|
||||
----
|
||||
|
||||
- fastroot to avoid traversing upper nodes with just 1 child
|
||||
- use a different system for tables that fit into one FSM page, with a
|
||||
mechanism to switch to the real thing as it grows.
|
||||
|
||||
@@ -76,14 +76,6 @@
|
||||
#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
|
||||
#define FSM_BOTTOM_LEVEL 0
|
||||
|
||||
/* Status codes for the local map. */
|
||||
|
||||
/* Either already tried, or beyond the end of the relation */
|
||||
#define FSM_LOCAL_NOT_AVAIL 0x00
|
||||
|
||||
/* Available to try */
|
||||
#define FSM_LOCAL_AVAIL 0x01
|
||||
|
||||
/*
|
||||
* The internal FSM routines work on a logical addressing scheme. Each
|
||||
* level of the tree can be thought of as a separately addressable file.
|
||||
@@ -97,17 +89,6 @@ typedef struct
|
||||
/* Address of the root page. */
|
||||
static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
|
||||
|
||||
/* Local map of block numbers for small heaps with no FSM. */
|
||||
typedef struct
|
||||
{
|
||||
BlockNumber nblocks;
|
||||
uint8 map[HEAP_FSM_CREATION_THRESHOLD];
|
||||
} FSMLocalMap;
|
||||
|
||||
static FSMLocalMap fsm_local_map = {0, {FSM_LOCAL_NOT_AVAIL}};
|
||||
|
||||
#define FSM_LOCAL_MAP_EXISTS (fsm_local_map.nblocks > 0)
|
||||
|
||||
/* functions to navigate the tree */
|
||||
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
|
||||
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
|
||||
@@ -126,14 +107,10 @@ static Size fsm_space_cat_to_avail(uint8 cat);
|
||||
/* workhorse functions for various operations */
|
||||
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
|
||||
uint8 newValue, uint8 minValue);
|
||||
static void fsm_local_set(Relation rel, BlockNumber cur_nblocks);
|
||||
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
|
||||
static BlockNumber fsm_local_search(void);
|
||||
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
|
||||
BlockNumber start, BlockNumber end,
|
||||
bool *eof);
|
||||
static bool fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
||||
BlockNumber nblocks, BlockNumber *get_nblocks);
|
||||
|
||||
|
||||
/******** Public API ********/
|
||||
@@ -150,46 +127,13 @@ static bool fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
||||
* amount of free space available on that page and then try again (see
|
||||
* RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
|
||||
* extend the relation.
|
||||
*
|
||||
* For very small heap relations that don't have a FSM, we try every other
|
||||
* page before extending the relation. To keep track of which pages have
|
||||
* been tried, initialize a local in-memory map of pages.
|
||||
*/
|
||||
BlockNumber
|
||||
GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only)
|
||||
GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
|
||||
{
|
||||
uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
|
||||
BlockNumber target_block,
|
||||
nblocks;
|
||||
|
||||
/* First try the FSM, if it exists. */
|
||||
target_block = fsm_search(rel, min_cat);
|
||||
|
||||
if (target_block == InvalidBlockNumber &&
|
||||
(rel->rd_rel->relkind == RELKIND_RELATION ||
|
||||
rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
|
||||
!check_fsm_only)
|
||||
{
|
||||
nblocks = RelationGetNumberOfBlocks(rel);
|
||||
|
||||
if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
||||
{
|
||||
/*
|
||||
* If the FSM knows nothing of the rel, try the last page before
|
||||
* we give up and extend. This avoids one-tuple-per-page syndrome
|
||||
* during bootstrapping or in a recently-started system.
|
||||
*/
|
||||
target_block = nblocks - 1;
|
||||
}
|
||||
else if (nblocks > 0)
|
||||
{
|
||||
/* Create or update local map and get first candidate block. */
|
||||
fsm_local_set(rel, nblocks);
|
||||
target_block = fsm_local_search();
|
||||
}
|
||||
}
|
||||
|
||||
return target_block;
|
||||
return fsm_search(rel, min_cat);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -200,47 +144,16 @@ GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only)
|
||||
* also some effort to return a page close to the old page; if there's a
|
||||
* page with enough free space on the same FSM page where the old one page
|
||||
* is located, it is preferred.
|
||||
*
|
||||
* For very small heap relations that don't have a FSM, we update the local
|
||||
* map to indicate we have tried a page, and return the next page to try.
|
||||
*/
|
||||
BlockNumber
|
||||
RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
|
||||
Size oldSpaceAvail, Size spaceNeeded)
|
||||
{
|
||||
int old_cat;
|
||||
int search_cat;
|
||||
int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
|
||||
int search_cat = fsm_space_needed_to_cat(spaceNeeded);
|
||||
FSMAddress addr;
|
||||
uint16 slot;
|
||||
int search_slot;
|
||||
BlockNumber nblocks = InvalidBlockNumber;
|
||||
|
||||
/* First try the local map, if it exists. */
|
||||
if (FSM_LOCAL_MAP_EXISTS)
|
||||
{
|
||||
Assert((rel->rd_rel->relkind == RELKIND_RELATION ||
|
||||
rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
|
||||
fsm_local_map.map[oldPage] == FSM_LOCAL_AVAIL);
|
||||
|
||||
fsm_local_map.map[oldPage] = FSM_LOCAL_NOT_AVAIL;
|
||||
return fsm_local_search();
|
||||
}
|
||||
|
||||
if (!fsm_allow_writes(rel, oldPage, InvalidBlockNumber, &nblocks))
|
||||
{
|
||||
/*
|
||||
* If we have neither a local map nor a FSM, we probably just
|
||||
* tried the target block in the smgr relation entry and failed,
|
||||
* so we'll need to create the local map.
|
||||
*/
|
||||
fsm_local_set(rel, nblocks);
|
||||
return fsm_local_search();
|
||||
}
|
||||
|
||||
/* Normal FSM logic follows */
|
||||
|
||||
old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
|
||||
search_cat = fsm_space_needed_to_cat(spaceNeeded);
|
||||
|
||||
/* Get the location of the FSM byte representing the heap block */
|
||||
addr = fsm_get_location(oldPage, &slot);
|
||||
@@ -263,41 +176,20 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
|
||||
* Note that if the new spaceAvail value is higher than the old value stored
|
||||
* in the FSM, the space might not become visible to searchers until the next
|
||||
* FreeSpaceMapVacuum call, which updates the upper level pages.
|
||||
*
|
||||
* Callers have no need for a local map.
|
||||
*/
|
||||
void
|
||||
RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
|
||||
Size spaceAvail, BlockNumber nblocks)
|
||||
RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
|
||||
{
|
||||
int new_cat;
|
||||
int new_cat = fsm_space_avail_to_cat(spaceAvail);
|
||||
FSMAddress addr;
|
||||
uint16 slot;
|
||||
BlockNumber dummy;
|
||||
|
||||
if (!fsm_allow_writes(rel, heapBlk, nblocks, &dummy))
|
||||
/* No FSM to update and no local map either */
|
||||
return;
|
||||
|
||||
/* Get the location of the FSM byte representing the heap block */
|
||||
addr = fsm_get_location(heapBlk, &slot);
|
||||
|
||||
new_cat = fsm_space_avail_to_cat(spaceAvail);
|
||||
fsm_set_and_search(rel, addr, slot, new_cat, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the local map. We must call this when we have found a block with
|
||||
* enough free space, when we extend the relation, or on transaction abort.
|
||||
*/
|
||||
void
|
||||
FSMClearLocalMap(void)
|
||||
{
|
||||
fsm_local_map.nblocks = 0;
|
||||
memset(&fsm_local_map.map, FSM_LOCAL_NOT_AVAIL,
|
||||
sizeof(fsm_local_map.map));
|
||||
}
|
||||
|
||||
/*
|
||||
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
|
||||
* WAL replay
|
||||
@@ -312,30 +204,6 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
|
||||
BlockNumber blkno;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
bool write_to_fsm;
|
||||
|
||||
/* This is meant to mirror the logic in fsm_allow_writes() */
|
||||
if (heapBlk >= HEAP_FSM_CREATION_THRESHOLD)
|
||||
write_to_fsm = true;
|
||||
else
|
||||
{
|
||||
/* Open the relation at smgr level */
|
||||
SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
|
||||
|
||||
if (smgrexists(smgr, FSM_FORKNUM))
|
||||
write_to_fsm = true;
|
||||
else
|
||||
{
|
||||
BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
|
||||
if (heap_nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
||||
write_to_fsm = true;
|
||||
else
|
||||
write_to_fsm = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!write_to_fsm)
|
||||
return;
|
||||
|
||||
/* Get the location of the FSM byte representing the heap block */
|
||||
addr = fsm_get_location(heapBlk, &slot);
|
||||
@@ -1036,134 +904,3 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
|
||||
|
||||
return max_avail;
|
||||
}
|
||||
|
||||
/*
|
||||
* For heaps, we prevent creation of the FSM unless the number of pages
|
||||
* exceeds HEAP_FSM_CREATION_THRESHOLD. For tables that don't already have
|
||||
* a FSM, this will save an inode and a few kB of space.
|
||||
*
|
||||
* XXX The API is a little awkward -- if the caller passes a valid nblocks
|
||||
* value, it can avoid invoking a system call. If the caller passes
|
||||
* InvalidBlockNumber and receives a false return value, it can get an
|
||||
* up-to-date relation size from get_nblocks. This saves a few cycles in
|
||||
* the caller, which would otherwise need to get the relation size by itself.
|
||||
*/
|
||||
static bool
|
||||
fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
||||
BlockNumber nblocks, BlockNumber *get_nblocks)
|
||||
{
|
||||
bool skip_get_nblocks;
|
||||
|
||||
if (heapblk >= HEAP_FSM_CREATION_THRESHOLD)
|
||||
return true;
|
||||
|
||||
/* Non-heap rels can always create a FSM. */
|
||||
if (rel->rd_rel->relkind != RELKIND_RELATION &&
|
||||
rel->rd_rel->relkind != RELKIND_TOASTVALUE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the caller knows nblocks, we can avoid a system call later.
|
||||
* If it doesn't, maybe we have relpages from a previous VACUUM.
|
||||
* Since the table may have extended since then, we still have to
|
||||
* count the pages later if we can't return now.
|
||||
*/
|
||||
if (nblocks != InvalidBlockNumber)
|
||||
{
|
||||
if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
||||
return true;
|
||||
else
|
||||
skip_get_nblocks = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rel->rd_rel->relpages != InvalidBlockNumber &&
|
||||
rel->rd_rel->relpages > HEAP_FSM_CREATION_THRESHOLD)
|
||||
return true;
|
||||
else
|
||||
skip_get_nblocks = false;
|
||||
}
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
|
||||
return true;
|
||||
|
||||
if (skip_get_nblocks)
|
||||
return false;
|
||||
|
||||
/* last resort */
|
||||
*get_nblocks = RelationGetNumberOfBlocks(rel);
|
||||
if (*get_nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize or update the local map of blocks to try, for when there is
|
||||
* no FSM.
|
||||
*
|
||||
* When we initialize the map, the whole heap is potentially available to
|
||||
* try. Testing revealed that trying every block can cause a small
|
||||
* performance dip compared to when we use a FSM, so we try every other
|
||||
* block instead.
|
||||
*/
|
||||
static void
|
||||
fsm_local_set(Relation rel, BlockNumber cur_nblocks)
|
||||
{
|
||||
BlockNumber blkno,
|
||||
cached_target_block;
|
||||
|
||||
/* The local map must not be set already. */
|
||||
Assert(!FSM_LOCAL_MAP_EXISTS);
|
||||
|
||||
/*
|
||||
* Starting at the current last block in the relation and working
|
||||
* backwards, mark alternating blocks as available.
|
||||
*/
|
||||
blkno = cur_nblocks - 1;
|
||||
while (true)
|
||||
{
|
||||
fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
|
||||
if (blkno >= 2)
|
||||
blkno -= 2;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* Cache the number of blocks. */
|
||||
fsm_local_map.nblocks = cur_nblocks;
|
||||
|
||||
/* Set the status of the cached target block to 'unavailable'. */
|
||||
cached_target_block = RelationGetTargetBlock(rel);
|
||||
if (cached_target_block != InvalidBlockNumber &&
|
||||
cached_target_block < cur_nblocks)
|
||||
fsm_local_map.map[cached_target_block] = FSM_LOCAL_NOT_AVAIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the local map for an available block to try, in descending order.
|
||||
* As such, there is no heuristic available to decide which order will be
|
||||
* better to try, but the probability of having space in the last block in the
|
||||
* map is higher because that is the most recent block added to the heap.
|
||||
*
|
||||
* This function is used when there is no FSM.
|
||||
*/
|
||||
static BlockNumber
|
||||
fsm_local_search(void)
|
||||
{
|
||||
BlockNumber target_block;
|
||||
|
||||
/* Local map must be set by now. */
|
||||
Assert(FSM_LOCAL_MAP_EXISTS);
|
||||
|
||||
target_block = fsm_local_map.nblocks;
|
||||
do
|
||||
{
|
||||
target_block--;
|
||||
if (fsm_local_map.map[target_block] == FSM_LOCAL_AVAIL)
|
||||
return target_block;
|
||||
} while (target_block > 0);
|
||||
|
||||
return InvalidBlockNumber;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
BlockNumber
|
||||
GetFreeIndexPage(Relation rel)
|
||||
{
|
||||
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2, true);
|
||||
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2);
|
||||
|
||||
if (blkno != InvalidBlockNumber)
|
||||
RecordUsedIndexPage(rel, blkno);
|
||||
@@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel)
|
||||
void
|
||||
RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
|
||||
{
|
||||
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber);
|
||||
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
|
||||
void
|
||||
RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
|
||||
{
|
||||
RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber);
|
||||
RecordPageWithFreeSpace(rel, usedBlock, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user