mirror of
https://github.com/postgres/postgres.git
synced 2025-06-05 23:56:58 +03:00
Revert "Avoid creation of the free space map for small heap relations."
This reverts commit ac88d2962a96a9c7e83d5acfc28fe49a72812086.
This commit is contained in:
parent
ac88d2962a
commit
a23676503b
@ -1,69 +1,48 @@
|
|||||||
CREATE EXTENSION pageinspect;
|
CREATE EXTENSION pageinspect;
|
||||||
CREATE TABLE test_rel_forks (a int);
|
CREATE TABLE test1 (a int, b int);
|
||||||
-- Make sure there are enough blocks in the heap for the FSM to be created.
|
INSERT INTO test1 VALUES (16777217, 131584);
|
||||||
INSERT INTO test_rel_forks SELECT i from generate_series(1,1000) i;
|
VACUUM test1; -- set up FSM
|
||||||
-- set up FSM and VM
|
|
||||||
VACUUM test_rel_forks;
|
|
||||||
-- The page contents can vary, so just test that it can be read
|
-- The page contents can vary, so just test that it can be read
|
||||||
-- successfully, but don't keep the output.
|
-- successfully, but don't keep the output.
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
|
SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
|
||||||
main_0
|
main_0
|
||||||
--------
|
--------
|
||||||
8192
|
8192
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
|
SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
|
||||||
ERROR: block number 100 is out of range for relation "test_rel_forks"
|
ERROR: block number 1 is out of range for relation "test1"
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
|
SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
|
||||||
fsm_0
|
fsm_0
|
||||||
-------
|
-------
|
||||||
8192
|
8192
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
|
SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
|
||||||
ERROR: block number 10 is out of range for relation "test_rel_forks"
|
fsm_1
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
|
-------
|
||||||
|
8192
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
|
||||||
vm_0
|
vm_0
|
||||||
------
|
------
|
||||||
8192
|
8192
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
|
SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
|
||||||
ERROR: block number 1 is out of range for relation "test_rel_forks"
|
ERROR: block number 1 is out of range for relation "test1"
|
||||||
SELECT octet_length(get_raw_page('xxx', 'main', 0));
|
SELECT octet_length(get_raw_page('xxx', 'main', 0));
|
||||||
ERROR: relation "xxx" does not exist
|
ERROR: relation "xxx" does not exist
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
|
SELECT octet_length(get_raw_page('test1', 'xxx', 0));
|
||||||
ERROR: invalid fork name
|
ERROR: invalid fork name
|
||||||
HINT: Valid fork names are "main", "fsm", "vm", and "init".
|
HINT: Valid fork names are "main", "fsm", "vm", and "init".
|
||||||
SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
|
SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
|
||||||
fsm_page_contents
|
|
||||||
-------------------
|
|
||||||
0: 147 +
|
|
||||||
1: 147 +
|
|
||||||
3: 147 +
|
|
||||||
7: 147 +
|
|
||||||
15: 147 +
|
|
||||||
31: 147 +
|
|
||||||
63: 147 +
|
|
||||||
127: 147 +
|
|
||||||
255: 147 +
|
|
||||||
511: 147 +
|
|
||||||
1023: 147 +
|
|
||||||
2047: 147 +
|
|
||||||
4095: 147 +
|
|
||||||
fp_next_slot: 0 +
|
|
||||||
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
|
|
||||||
?column?
|
?column?
|
||||||
----------
|
----------
|
||||||
t
|
t
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
DROP TABLE test_rel_forks;
|
|
||||||
CREATE TABLE test1 (a int, b int);
|
|
||||||
INSERT INTO test1 VALUES (16777217, 131584);
|
|
||||||
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
|
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
|
||||||
pagesize | version
|
pagesize | version
|
||||||
----------+---------
|
----------+---------
|
||||||
@ -83,6 +62,26 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
|
|||||||
{"\\x01000001","\\x00020200"}
|
{"\\x01000001","\\x00020200"}
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
|
||||||
|
fsm_page_contents
|
||||||
|
-------------------
|
||||||
|
0: 254 +
|
||||||
|
1: 254 +
|
||||||
|
3: 254 +
|
||||||
|
7: 254 +
|
||||||
|
15: 254 +
|
||||||
|
31: 254 +
|
||||||
|
63: 254 +
|
||||||
|
127: 254 +
|
||||||
|
255: 254 +
|
||||||
|
511: 254 +
|
||||||
|
1023: 254 +
|
||||||
|
2047: 254 +
|
||||||
|
4095: 254 +
|
||||||
|
fp_next_slot: 0 +
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
DROP TABLE test1;
|
DROP TABLE test1;
|
||||||
-- check that using any of these functions with a partitioned table or index
|
-- check that using any of these functions with a partitioned table or index
|
||||||
-- would fail
|
-- would fail
|
||||||
|
@ -1,35 +1,26 @@
|
|||||||
CREATE EXTENSION pageinspect;
|
CREATE EXTENSION pageinspect;
|
||||||
|
|
||||||
CREATE TABLE test_rel_forks (a int);
|
CREATE TABLE test1 (a int, b int);
|
||||||
-- Make sure there are enough blocks in the heap for the FSM to be created.
|
INSERT INTO test1 VALUES (16777217, 131584);
|
||||||
INSERT INTO test_rel_forks SELECT i from generate_series(1,1000) i;
|
|
||||||
|
|
||||||
-- set up FSM and VM
|
VACUUM test1; -- set up FSM
|
||||||
VACUUM test_rel_forks;
|
|
||||||
|
|
||||||
-- The page contents can vary, so just test that it can be read
|
-- The page contents can vary, so just test that it can be read
|
||||||
-- successfully, but don't keep the output.
|
-- successfully, but don't keep the output.
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
|
SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
|
SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
|
SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
|
SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
|
SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
|
SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
|
||||||
|
|
||||||
SELECT octet_length(get_raw_page('xxx', 'main', 0));
|
SELECT octet_length(get_raw_page('xxx', 'main', 0));
|
||||||
SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
|
SELECT octet_length(get_raw_page('test1', 'xxx', 0));
|
||||||
|
|
||||||
SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
|
SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
|
||||||
|
|
||||||
SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
|
|
||||||
|
|
||||||
DROP TABLE test_rel_forks;
|
|
||||||
|
|
||||||
CREATE TABLE test1 (a int, b int);
|
|
||||||
INSERT INTO test1 VALUES (16777217, 131584);
|
|
||||||
|
|
||||||
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
|
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
|
||||||
|
|
||||||
@ -38,6 +29,8 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_
|
|||||||
SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
|
SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
|
||||||
FROM heap_page_items(get_raw_page('test1', 0));
|
FROM heap_page_items(get_raw_page('test1', 0));
|
||||||
|
|
||||||
|
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
|
||||||
|
|
||||||
DROP TABLE test1;
|
DROP TABLE test1;
|
||||||
|
|
||||||
-- check that using any of these functions with a partitioned table or index
|
-- check that using any of these functions with a partitioned table or index
|
||||||
|
@ -590,13 +590,12 @@ tuple would otherwise be too big.
|
|||||||
<indexterm><primary>FSM</primary><see>Free Space Map</see></indexterm>
|
<indexterm><primary>FSM</primary><see>Free Space Map</see></indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Each heap relation, unless it is very small, and each index relation, except
|
Each heap and index relation, except for hash indexes, has a Free Space Map
|
||||||
for hash indexes, has a Free Space Map (FSM) to keep track of available
|
(FSM) to keep track of available space in the relation. It's stored
|
||||||
space in the relation. It's stored alongside the main relation data in a
|
alongside the main relation data in a separate relation fork, named after the
|
||||||
separate relation fork, named after the filenode number of the relation, plus
|
filenode number of the relation, plus a <literal>_fsm</literal> suffix. For example,
|
||||||
a <literal>_fsm</literal> suffix. For example, if the filenode of a relation
|
if the filenode of a relation is 12345, the FSM is stored in a file called
|
||||||
is 12345, the FSM is stored in a file called <filename>12345_fsm</filename>,
|
<filename>12345_fsm</filename>, in the same directory as the main relation file.
|
||||||
in the same directory as the main relation file.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
@ -1150,7 +1150,7 @@ terminate_brin_buildstate(BrinBuildState *state)
|
|||||||
freespace = PageGetFreeSpace(page);
|
freespace = PageGetFreeSpace(page);
|
||||||
blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
|
blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
|
||||||
ReleaseBuffer(state->bs_currentInsertBuf);
|
ReleaseBuffer(state->bs_currentInsertBuf);
|
||||||
RecordPageWithFreeSpace(state->bs_irel, blk, freespace, InvalidBlockNumber);
|
RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
|
||||||
FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
|
FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
|
|||||||
|
|
||||||
if (extended)
|
if (extended)
|
||||||
{
|
{
|
||||||
RecordPageWithFreeSpace(idxrel, newblk, freespace, InvalidBlockNumber);
|
RecordPageWithFreeSpace(idxrel, newblk, freespace);
|
||||||
FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
|
FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
|
|||||||
|
|
||||||
if (extended)
|
if (extended)
|
||||||
{
|
{
|
||||||
RecordPageWithFreeSpace(idxrel, blk, freespace, InvalidBlockNumber);
|
RecordPageWithFreeSpace(idxrel, blk, freespace);
|
||||||
FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
|
FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf)
|
|||||||
|
|
||||||
/* Measure free space and record it */
|
/* Measure free space and record it */
|
||||||
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
|
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
|
||||||
br_page_get_freespace(page), InvalidBlockNumber);
|
br_page_get_freespace(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -703,7 +703,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
|
|||||||
/* Choose initial target page, re-using existing target if known */
|
/* Choose initial target page, re-using existing target if known */
|
||||||
newblk = RelationGetTargetBlock(irel);
|
newblk = RelationGetTargetBlock(irel);
|
||||||
if (newblk == InvalidBlockNumber)
|
if (newblk == InvalidBlockNumber)
|
||||||
newblk = GetPageWithFreeSpace(irel, itemsz, true);
|
newblk = GetPageWithFreeSpace(irel, itemsz);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Loop until we find a page with sufficient free space. By the time we
|
* Loop until we find a page with sufficient free space. By the time we
|
||||||
@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
|
|||||||
* pages whose FSM records were forgotten in a crash.
|
* pages whose FSM records were forgotten in a crash.
|
||||||
*/
|
*/
|
||||||
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
|
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
|
||||||
br_page_get_freespace(page), InvalidBlockNumber);
|
br_page_get_freespace(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -239,14 +239,8 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
|||||||
* Immediately update the bottom level of the FSM. This has a good
|
* Immediately update the bottom level of the FSM. This has a good
|
||||||
* chance of making this page visible to other concurrently inserting
|
* chance of making this page visible to other concurrently inserting
|
||||||
* backends, and we want that to happen without delay.
|
* backends, and we want that to happen without delay.
|
||||||
*
|
|
||||||
* Since we know the table will end up with extraBlocks additional
|
|
||||||
* pages, we pass the final number to avoid possible unnecessary
|
|
||||||
* system calls and to make sure the FSM is created when we add the
|
|
||||||
* first new page.
|
|
||||||
*/
|
*/
|
||||||
RecordPageWithFreeSpace(relation, blockNum, freespace,
|
RecordPageWithFreeSpace(relation, blockNum, freespace);
|
||||||
firstBlock + extraBlocks);
|
|
||||||
}
|
}
|
||||||
while (--extraBlocks > 0);
|
while (--extraBlocks > 0);
|
||||||
|
|
||||||
@ -383,9 +377,20 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
|||||||
* We have no cached target page, so ask the FSM for an initial
|
* We have no cached target page, so ask the FSM for an initial
|
||||||
* target.
|
* target.
|
||||||
*/
|
*/
|
||||||
targetBlock = GetPageWithFreeSpace(relation,
|
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
|
||||||
len + saveFreeSpace,
|
|
||||||
false);
|
/*
|
||||||
|
* If the FSM knows nothing of the rel, try the last page before we
|
||||||
|
* give up and extend. This avoids one-tuple-per-page syndrome during
|
||||||
|
* bootstrapping or in a recently-started system.
|
||||||
|
*/
|
||||||
|
if (targetBlock == InvalidBlockNumber)
|
||||||
|
{
|
||||||
|
BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
|
||||||
|
|
||||||
|
if (nblocks > 0)
|
||||||
|
targetBlock = nblocks - 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
@ -479,14 +484,6 @@ loop:
|
|||||||
{
|
{
|
||||||
/* use this page as future insert target, too */
|
/* use this page as future insert target, too */
|
||||||
RelationSetTargetBlock(relation, targetBlock);
|
RelationSetTargetBlock(relation, targetBlock);
|
||||||
|
|
||||||
/*
|
|
||||||
* In case we used an in-memory map of available blocks, reset it
|
|
||||||
* for next use.
|
|
||||||
*/
|
|
||||||
if (targetBlock < HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
FSMClearLocalMap();
|
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -546,12 +543,9 @@ loop:
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if some other backend has extended a block for us while
|
* Check if some other backend has extended a block for us while
|
||||||
* we were waiting on the lock. We only check the FSM -- if there
|
* we were waiting on the lock.
|
||||||
* isn't one we don't recheck the number of blocks.
|
|
||||||
*/
|
*/
|
||||||
targetBlock = GetPageWithFreeSpace(relation,
|
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
|
||||||
len + saveFreeSpace,
|
|
||||||
true);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If some other waiter has already extended the relation, we
|
* If some other waiter has already extended the relation, we
|
||||||
@ -631,12 +625,5 @@ loop:
|
|||||||
*/
|
*/
|
||||||
RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
|
RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
|
||||||
|
|
||||||
/*
|
|
||||||
* In case we used an in-memory map of available blocks, reset it for next
|
|
||||||
* use. We do this unconditionally since after relation extension we
|
|
||||||
* can't skip this based on the targetBlock.
|
|
||||||
*/
|
|
||||||
FSMClearLocalMap();
|
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
@ -153,7 +153,7 @@ static BufferAccessStrategy vac_strategy;
|
|||||||
static void lazy_scan_heap(Relation onerel, int options,
|
static void lazy_scan_heap(Relation onerel, int options,
|
||||||
LVRelStats *vacrelstats, Relation *Irel, int nindexes,
|
LVRelStats *vacrelstats, Relation *Irel, int nindexes,
|
||||||
bool aggressive);
|
bool aggressive);
|
||||||
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks);
|
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
|
||||||
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
|
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
|
||||||
static void lazy_vacuum_index(Relation indrel,
|
static void lazy_vacuum_index(Relation indrel,
|
||||||
IndexBulkDeleteResult **stats,
|
IndexBulkDeleteResult **stats,
|
||||||
@ -758,7 +758,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
|
pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
|
||||||
|
|
||||||
/* Remove tuples from heap */
|
/* Remove tuples from heap */
|
||||||
lazy_vacuum_heap(onerel, vacrelstats, nblocks);
|
lazy_vacuum_heap(onerel, vacrelstats);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Forget the now-vacuumed tuples, and press on, but be careful
|
* Forget the now-vacuumed tuples, and press on, but be careful
|
||||||
@ -896,7 +896,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
MarkBufferDirty(buf);
|
MarkBufferDirty(buf);
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
|
|
||||||
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
|
RecordPageWithFreeSpace(onerel, blkno, freespace);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -935,7 +935,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
}
|
}
|
||||||
|
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
|
RecordPageWithFreeSpace(onerel, blkno, freespace);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1332,7 +1332,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
* taken if there are no indexes.)
|
* taken if there are no indexes.)
|
||||||
*/
|
*/
|
||||||
if (vacrelstats->num_dead_tuples == prev_dead_count)
|
if (vacrelstats->num_dead_tuples == prev_dead_count)
|
||||||
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
|
RecordPageWithFreeSpace(onerel, blkno, freespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* report that everything is scanned and vacuumed */
|
/* report that everything is scanned and vacuumed */
|
||||||
@ -1394,7 +1394,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
/* Remove tuples from heap */
|
/* Remove tuples from heap */
|
||||||
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
|
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
|
||||||
PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
|
PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
|
||||||
lazy_vacuum_heap(onerel, vacrelstats, nblocks);
|
lazy_vacuum_heap(onerel, vacrelstats);
|
||||||
vacrelstats->num_index_scans++;
|
vacrelstats->num_index_scans++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1465,10 +1465,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
|
|||||||
* Note: the reason for doing this as a second pass is we cannot remove
|
* Note: the reason for doing this as a second pass is we cannot remove
|
||||||
* the tuples until we've removed their index entries, and we want to
|
* the tuples until we've removed their index entries, and we want to
|
||||||
* process index entry removal in batches as large as possible.
|
* process index entry removal in batches as large as possible.
|
||||||
* Note: nblocks is passed as an optimization for RecordPageWithFreeSpace().
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
|
lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||||
{
|
{
|
||||||
int tupindex;
|
int tupindex;
|
||||||
int npages;
|
int npages;
|
||||||
@ -1505,7 +1504,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
|
|||||||
freespace = PageGetHeapFreeSpace(page);
|
freespace = PageGetHeapFreeSpace(page);
|
||||||
|
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
RecordPageWithFreeSpace(onerel, tblk, freespace, nblocks);
|
RecordPageWithFreeSpace(onerel, tblk, freespace);
|
||||||
npages++;
|
npages++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,6 @@
|
|||||||
#include "replication/walsender.h"
|
#include "replication/walsender.h"
|
||||||
#include "storage/condition_variable.h"
|
#include "storage/condition_variable.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "storage/freespace.h"
|
|
||||||
#include "storage/lmgr.h"
|
#include "storage/lmgr.h"
|
||||||
#include "storage/predicate.h"
|
#include "storage/predicate.h"
|
||||||
#include "storage/proc.h"
|
#include "storage/proc.h"
|
||||||
@ -2494,12 +2493,6 @@ AbortTransaction(void)
|
|||||||
pgstat_report_wait_end();
|
pgstat_report_wait_end();
|
||||||
pgstat_progress_end_command();
|
pgstat_progress_end_command();
|
||||||
|
|
||||||
/*
|
|
||||||
* In case we aborted during RelationGetBufferForTuple(), clear the local
|
|
||||||
* map of heap pages.
|
|
||||||
*/
|
|
||||||
FSMClearLocalMap();
|
|
||||||
|
|
||||||
/* Clean up buffer I/O and buffer context locks, too */
|
/* Clean up buffer I/O and buffer context locks, too */
|
||||||
AbortBufferIO();
|
AbortBufferIO();
|
||||||
UnlockBuffers();
|
UnlockBuffers();
|
||||||
@ -4721,13 +4714,6 @@ AbortSubTransaction(void)
|
|||||||
|
|
||||||
pgstat_report_wait_end();
|
pgstat_report_wait_end();
|
||||||
pgstat_progress_end_command();
|
pgstat_progress_end_command();
|
||||||
|
|
||||||
/*
|
|
||||||
* In case we aborted during RelationGetBufferForTuple(), clear the local
|
|
||||||
* map of heap pages.
|
|
||||||
*/
|
|
||||||
FSMClearLocalMap();
|
|
||||||
|
|
||||||
AbortBufferIO();
|
AbortBufferIO();
|
||||||
UnlockBuffers();
|
UnlockBuffers();
|
||||||
|
|
||||||
|
@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page
|
|||||||
exists and the relation must be extended by one page. As of PostgreSQL 8.4
|
exists and the relation must be extended by one page. As of PostgreSQL 8.4
|
||||||
each relation has its own, extensible free space map stored in a separate
|
each relation has its own, extensible free space map stored in a separate
|
||||||
"fork" of its relation. This eliminates the disadvantages of the former
|
"fork" of its relation. This eliminates the disadvantages of the former
|
||||||
fixed-size FSM. There are two exceptions:
|
fixed-size FSM.
|
||||||
|
|
||||||
1. Hash indexes never have a FSM.
|
|
||||||
2. For very small tables, a 3-page relation fork would be relatively large
|
|
||||||
and wasteful, so to save space we refrain from creating the FSM if the
|
|
||||||
heap has HEAP_FSM_CREATION_THRESHOLD pages or fewer.
|
|
||||||
|
|
||||||
To locate free space in the latter case, we simply try pages directly without
|
|
||||||
knowing ahead of time how much free space they have. To maintain good
|
|
||||||
performance, we create a local in-memory map of pages to try, and only mark
|
|
||||||
every other page as available. For example, in a 3-page heap, the local map
|
|
||||||
would look like:
|
|
||||||
|
|
||||||
ANAN
|
|
||||||
0123
|
|
||||||
|
|
||||||
Pages 0 and 2 are marked "available", and page 1 as "not available".
|
|
||||||
Page 3 is beyond the end of the relation, so is likewise marked "not
|
|
||||||
available". First we try page 2, and if that doesn't have sufficient free
|
|
||||||
space we try page 0 before giving up and extending the relation. There may
|
|
||||||
be some wasted free space on block 1, but if the relation extends to 4 pages:
|
|
||||||
|
|
||||||
NANA
|
|
||||||
0123
|
|
||||||
|
|
||||||
We not only have the new page 3 at our disposal, we can now check page 1
|
|
||||||
for free space as well.
|
|
||||||
|
|
||||||
Once the FSM is created for a heap we don't remove it even if somebody deletes
|
|
||||||
all the rows from the corresponding relation. We don't think it is a useful
|
|
||||||
optimization as it is quite likely that relation will again grow to the same
|
|
||||||
size.
|
|
||||||
|
|
||||||
FSM data structure
|
|
||||||
------------------
|
|
||||||
|
|
||||||
It is important to keep the map small so that it can be searched rapidly.
|
It is important to keep the map small so that it can be searched rapidly.
|
||||||
Therefore, we don't attempt to record the exact free space on a page.
|
Therefore, we don't attempt to record the exact free space on a page.
|
||||||
@ -226,3 +192,5 @@ TODO
|
|||||||
----
|
----
|
||||||
|
|
||||||
- fastroot to avoid traversing upper nodes with just 1 child
|
- fastroot to avoid traversing upper nodes with just 1 child
|
||||||
|
- use a different system for tables that fit into one FSM page, with a
|
||||||
|
mechanism to switch to the real thing as it grows.
|
||||||
|
@ -76,14 +76,6 @@
|
|||||||
#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
|
#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
|
||||||
#define FSM_BOTTOM_LEVEL 0
|
#define FSM_BOTTOM_LEVEL 0
|
||||||
|
|
||||||
/* Status codes for the local map. */
|
|
||||||
|
|
||||||
/* Either already tried, or beyond the end of the relation */
|
|
||||||
#define FSM_LOCAL_NOT_AVAIL 0x00
|
|
||||||
|
|
||||||
/* Available to try */
|
|
||||||
#define FSM_LOCAL_AVAIL 0x01
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The internal FSM routines work on a logical addressing scheme. Each
|
* The internal FSM routines work on a logical addressing scheme. Each
|
||||||
* level of the tree can be thought of as a separately addressable file.
|
* level of the tree can be thought of as a separately addressable file.
|
||||||
@ -97,17 +89,6 @@ typedef struct
|
|||||||
/* Address of the root page. */
|
/* Address of the root page. */
|
||||||
static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
|
static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
|
||||||
|
|
||||||
/* Local map of block numbers for small heaps with no FSM. */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
BlockNumber nblocks;
|
|
||||||
uint8 map[HEAP_FSM_CREATION_THRESHOLD];
|
|
||||||
} FSMLocalMap;
|
|
||||||
|
|
||||||
static FSMLocalMap fsm_local_map = {0, {FSM_LOCAL_NOT_AVAIL}};
|
|
||||||
|
|
||||||
#define FSM_LOCAL_MAP_EXISTS (fsm_local_map.nblocks > 0)
|
|
||||||
|
|
||||||
/* functions to navigate the tree */
|
/* functions to navigate the tree */
|
||||||
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
|
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
|
||||||
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
|
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
|
||||||
@ -126,14 +107,10 @@ static Size fsm_space_cat_to_avail(uint8 cat);
|
|||||||
/* workhorse functions for various operations */
|
/* workhorse functions for various operations */
|
||||||
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
|
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
|
||||||
uint8 newValue, uint8 minValue);
|
uint8 newValue, uint8 minValue);
|
||||||
static void fsm_local_set(Relation rel, BlockNumber cur_nblocks);
|
|
||||||
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
|
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
|
||||||
static BlockNumber fsm_local_search(void);
|
|
||||||
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
|
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
|
||||||
BlockNumber start, BlockNumber end,
|
BlockNumber start, BlockNumber end,
|
||||||
bool *eof);
|
bool *eof);
|
||||||
static bool fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
|
||||||
BlockNumber nblocks, BlockNumber *get_nblocks);
|
|
||||||
|
|
||||||
|
|
||||||
/******** Public API ********/
|
/******** Public API ********/
|
||||||
@ -150,46 +127,13 @@ static bool fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
|||||||
* amount of free space available on that page and then try again (see
|
* amount of free space available on that page and then try again (see
|
||||||
* RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
|
* RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
|
||||||
* extend the relation.
|
* extend the relation.
|
||||||
*
|
|
||||||
* For very small heap relations that don't have a FSM, we try every other
|
|
||||||
* page before extending the relation. To keep track of which pages have
|
|
||||||
* been tried, initialize a local in-memory map of pages.
|
|
||||||
*/
|
*/
|
||||||
BlockNumber
|
BlockNumber
|
||||||
GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only)
|
GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
|
||||||
{
|
{
|
||||||
uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
|
uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
|
||||||
BlockNumber target_block,
|
|
||||||
nblocks;
|
|
||||||
|
|
||||||
/* First try the FSM, if it exists. */
|
return fsm_search(rel, min_cat);
|
||||||
target_block = fsm_search(rel, min_cat);
|
|
||||||
|
|
||||||
if (target_block == InvalidBlockNumber &&
|
|
||||||
(rel->rd_rel->relkind == RELKIND_RELATION ||
|
|
||||||
rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
|
|
||||||
!check_fsm_only)
|
|
||||||
{
|
|
||||||
nblocks = RelationGetNumberOfBlocks(rel);
|
|
||||||
|
|
||||||
if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* If the FSM knows nothing of the rel, try the last page before
|
|
||||||
* we give up and extend. This avoids one-tuple-per-page syndrome
|
|
||||||
* during bootstrapping or in a recently-started system.
|
|
||||||
*/
|
|
||||||
target_block = nblocks - 1;
|
|
||||||
}
|
|
||||||
else if (nblocks > 0)
|
|
||||||
{
|
|
||||||
/* Create or update local map and get first candidate block. */
|
|
||||||
fsm_local_set(rel, nblocks);
|
|
||||||
target_block = fsm_local_search();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return target_block;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -200,47 +144,16 @@ GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only)
|
|||||||
* also some effort to return a page close to the old page; if there's a
|
* also some effort to return a page close to the old page; if there's a
|
||||||
* page with enough free space on the same FSM page where the old one page
|
* page with enough free space on the same FSM page where the old one page
|
||||||
* is located, it is preferred.
|
* is located, it is preferred.
|
||||||
*
|
|
||||||
* For very small heap relations that don't have a FSM, we update the local
|
|
||||||
* map to indicate we have tried a page, and return the next page to try.
|
|
||||||
*/
|
*/
|
||||||
BlockNumber
|
BlockNumber
|
||||||
RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
|
RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
|
||||||
Size oldSpaceAvail, Size spaceNeeded)
|
Size oldSpaceAvail, Size spaceNeeded)
|
||||||
{
|
{
|
||||||
int old_cat;
|
int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
|
||||||
int search_cat;
|
int search_cat = fsm_space_needed_to_cat(spaceNeeded);
|
||||||
FSMAddress addr;
|
FSMAddress addr;
|
||||||
uint16 slot;
|
uint16 slot;
|
||||||
int search_slot;
|
int search_slot;
|
||||||
BlockNumber nblocks = InvalidBlockNumber;
|
|
||||||
|
|
||||||
/* First try the local map, if it exists. */
|
|
||||||
if (FSM_LOCAL_MAP_EXISTS)
|
|
||||||
{
|
|
||||||
Assert((rel->rd_rel->relkind == RELKIND_RELATION ||
|
|
||||||
rel->rd_rel->relkind == RELKIND_TOASTVALUE) &&
|
|
||||||
fsm_local_map.map[oldPage] == FSM_LOCAL_AVAIL);
|
|
||||||
|
|
||||||
fsm_local_map.map[oldPage] = FSM_LOCAL_NOT_AVAIL;
|
|
||||||
return fsm_local_search();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fsm_allow_writes(rel, oldPage, InvalidBlockNumber, &nblocks))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* If we have neither a local map nor a FSM, we probably just
|
|
||||||
* tried the target block in the smgr relation entry and failed,
|
|
||||||
* so we'll need to create the local map.
|
|
||||||
*/
|
|
||||||
fsm_local_set(rel, nblocks);
|
|
||||||
return fsm_local_search();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Normal FSM logic follows */
|
|
||||||
|
|
||||||
old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
|
|
||||||
search_cat = fsm_space_needed_to_cat(spaceNeeded);
|
|
||||||
|
|
||||||
/* Get the location of the FSM byte representing the heap block */
|
/* Get the location of the FSM byte representing the heap block */
|
||||||
addr = fsm_get_location(oldPage, &slot);
|
addr = fsm_get_location(oldPage, &slot);
|
||||||
@ -263,41 +176,20 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
|
|||||||
* Note that if the new spaceAvail value is higher than the old value stored
|
* Note that if the new spaceAvail value is higher than the old value stored
|
||||||
* in the FSM, the space might not become visible to searchers until the next
|
* in the FSM, the space might not become visible to searchers until the next
|
||||||
* FreeSpaceMapVacuum call, which updates the upper level pages.
|
* FreeSpaceMapVacuum call, which updates the upper level pages.
|
||||||
*
|
|
||||||
* Callers have no need for a local map.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
|
RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
|
||||||
Size spaceAvail, BlockNumber nblocks)
|
|
||||||
{
|
{
|
||||||
int new_cat;
|
int new_cat = fsm_space_avail_to_cat(spaceAvail);
|
||||||
FSMAddress addr;
|
FSMAddress addr;
|
||||||
uint16 slot;
|
uint16 slot;
|
||||||
BlockNumber dummy;
|
|
||||||
|
|
||||||
if (!fsm_allow_writes(rel, heapBlk, nblocks, &dummy))
|
|
||||||
/* No FSM to update and no local map either */
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Get the location of the FSM byte representing the heap block */
|
/* Get the location of the FSM byte representing the heap block */
|
||||||
addr = fsm_get_location(heapBlk, &slot);
|
addr = fsm_get_location(heapBlk, &slot);
|
||||||
|
|
||||||
new_cat = fsm_space_avail_to_cat(spaceAvail);
|
|
||||||
fsm_set_and_search(rel, addr, slot, new_cat, 0);
|
fsm_set_and_search(rel, addr, slot, new_cat, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Clear the local map. We must call this when we have found a block with
|
|
||||||
* enough free space, when we extend the relation, or on transaction abort.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
FSMClearLocalMap(void)
|
|
||||||
{
|
|
||||||
fsm_local_map.nblocks = 0;
|
|
||||||
memset(&fsm_local_map.map, FSM_LOCAL_NOT_AVAIL,
|
|
||||||
sizeof(fsm_local_map.map));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
|
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
|
||||||
* WAL replay
|
* WAL replay
|
||||||
@ -312,30 +204,6 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
|
|||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
Page page;
|
Page page;
|
||||||
bool write_to_fsm;
|
|
||||||
|
|
||||||
/* This is meant to mirror the logic in fsm_allow_writes() */
|
|
||||||
if (heapBlk >= HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
write_to_fsm = true;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Open the relation at smgr level */
|
|
||||||
SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
|
|
||||||
|
|
||||||
if (smgrexists(smgr, FSM_FORKNUM))
|
|
||||||
write_to_fsm = true;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
|
|
||||||
if (heap_nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
write_to_fsm = true;
|
|
||||||
else
|
|
||||||
write_to_fsm = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!write_to_fsm)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Get the location of the FSM byte representing the heap block */
|
/* Get the location of the FSM byte representing the heap block */
|
||||||
addr = fsm_get_location(heapBlk, &slot);
|
addr = fsm_get_location(heapBlk, &slot);
|
||||||
@ -1036,134 +904,3 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
|
|||||||
|
|
||||||
return max_avail;
|
return max_avail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* For heaps, we prevent creation of the FSM unless the number of pages
|
|
||||||
* exceeds HEAP_FSM_CREATION_THRESHOLD. For tables that don't already have
|
|
||||||
* a FSM, this will save an inode and a few kB of space.
|
|
||||||
*
|
|
||||||
* XXX The API is a little awkward -- if the caller passes a valid nblocks
|
|
||||||
* value, it can avoid invoking a system call. If the caller passes
|
|
||||||
* InvalidBlockNumber and receives a false return value, it can get an
|
|
||||||
* up-to-date relation size from get_nblocks. This saves a few cycles in
|
|
||||||
* the caller, which would otherwise need to get the relation size by itself.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
fsm_allow_writes(Relation rel, BlockNumber heapblk,
|
|
||||||
BlockNumber nblocks, BlockNumber *get_nblocks)
|
|
||||||
{
|
|
||||||
bool skip_get_nblocks;
|
|
||||||
|
|
||||||
if (heapblk >= HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* Non-heap rels can always create a FSM. */
|
|
||||||
if (rel->rd_rel->relkind != RELKIND_RELATION &&
|
|
||||||
rel->rd_rel->relkind != RELKIND_TOASTVALUE)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the caller knows nblocks, we can avoid a system call later.
|
|
||||||
* If it doesn't, maybe we have relpages from a previous VACUUM.
|
|
||||||
* Since the table may have extended since then, we still have to
|
|
||||||
* count the pages later if we can't return now.
|
|
||||||
*/
|
|
||||||
if (nblocks != InvalidBlockNumber)
|
|
||||||
{
|
|
||||||
if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
skip_get_nblocks = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (rel->rd_rel->relpages != InvalidBlockNumber &&
|
|
||||||
rel->rd_rel->relpages > HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
skip_get_nblocks = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RelationOpenSmgr(rel);
|
|
||||||
if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (skip_get_nblocks)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* last resort */
|
|
||||||
*get_nblocks = RelationGetNumberOfBlocks(rel);
|
|
||||||
if (*get_nblocks > HEAP_FSM_CREATION_THRESHOLD)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize or update the local map of blocks to try, for when there is
|
|
||||||
* no FSM.
|
|
||||||
*
|
|
||||||
* When we initialize the map, the whole heap is potentially available to
|
|
||||||
* try. Testing revealed that trying every block can cause a small
|
|
||||||
* performance dip compared to when we use a FSM, so we try every other
|
|
||||||
* block instead.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
fsm_local_set(Relation rel, BlockNumber cur_nblocks)
|
|
||||||
{
|
|
||||||
BlockNumber blkno,
|
|
||||||
cached_target_block;
|
|
||||||
|
|
||||||
/* The local map must not be set already. */
|
|
||||||
Assert(!FSM_LOCAL_MAP_EXISTS);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Starting at the current last block in the relation and working
|
|
||||||
* backwards, mark alternating blocks as available.
|
|
||||||
*/
|
|
||||||
blkno = cur_nblocks - 1;
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
|
|
||||||
if (blkno >= 2)
|
|
||||||
blkno -= 2;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Cache the number of blocks. */
|
|
||||||
fsm_local_map.nblocks = cur_nblocks;
|
|
||||||
|
|
||||||
/* Set the status of the cached target block to 'unavailable'. */
|
|
||||||
cached_target_block = RelationGetTargetBlock(rel);
|
|
||||||
if (cached_target_block != InvalidBlockNumber &&
|
|
||||||
cached_target_block < cur_nblocks)
|
|
||||||
fsm_local_map.map[cached_target_block] = FSM_LOCAL_NOT_AVAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Search the local map for an available block to try, in descending order.
|
|
||||||
* As such, there is no heuristic available to decide which order will be
|
|
||||||
* better to try, but the probability of having space in the last block in the
|
|
||||||
* map is higher because that is the most recent block added to the heap.
|
|
||||||
*
|
|
||||||
* This function is used when there is no FSM.
|
|
||||||
*/
|
|
||||||
static BlockNumber
|
|
||||||
fsm_local_search(void)
|
|
||||||
{
|
|
||||||
BlockNumber target_block;
|
|
||||||
|
|
||||||
/* Local map must be set by now. */
|
|
||||||
Assert(FSM_LOCAL_MAP_EXISTS);
|
|
||||||
|
|
||||||
target_block = fsm_local_map.nblocks;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
target_block--;
|
|
||||||
if (fsm_local_map.map[target_block] == FSM_LOCAL_AVAIL)
|
|
||||||
return target_block;
|
|
||||||
} while (target_block > 0);
|
|
||||||
|
|
||||||
return InvalidBlockNumber;
|
|
||||||
}
|
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
BlockNumber
|
BlockNumber
|
||||||
GetFreeIndexPage(Relation rel)
|
GetFreeIndexPage(Relation rel)
|
||||||
{
|
{
|
||||||
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2, true);
|
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2);
|
||||||
|
|
||||||
if (blkno != InvalidBlockNumber)
|
if (blkno != InvalidBlockNumber)
|
||||||
RecordUsedIndexPage(rel, blkno);
|
RecordUsedIndexPage(rel, blkno);
|
||||||
@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel)
|
|||||||
void
|
void
|
||||||
RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
|
RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
|
||||||
{
|
{
|
||||||
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber);
|
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
|
|||||||
void
|
void
|
||||||
RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
|
RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
|
||||||
{
|
{
|
||||||
RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber);
|
RecordPageWithFreeSpace(rel, usedBlock, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -18,20 +18,15 @@
|
|||||||
#include "storage/relfilenode.h"
|
#include "storage/relfilenode.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
|
|
||||||
/* Only create the FSM if the heap has greater than this many blocks */
|
|
||||||
#define HEAP_FSM_CREATION_THRESHOLD 4
|
|
||||||
|
|
||||||
/* prototypes for public functions in freespace.c */
|
/* prototypes for public functions in freespace.c */
|
||||||
extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
|
extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
|
||||||
extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded,
|
extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
|
||||||
bool check_fsm_only);
|
|
||||||
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
|
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
|
||||||
BlockNumber oldPage,
|
BlockNumber oldPage,
|
||||||
Size oldSpaceAvail,
|
Size oldSpaceAvail,
|
||||||
Size spaceNeeded);
|
Size spaceNeeded);
|
||||||
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
|
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
|
||||||
Size spaceAvail, BlockNumber nblocks);
|
Size spaceAvail);
|
||||||
extern void FSMClearLocalMap(void);
|
|
||||||
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
|
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
|
||||||
Size spaceAvail);
|
Size spaceAvail);
|
||||||
|
|
||||||
|
@ -1,75 +0,0 @@
|
|||||||
--
|
|
||||||
-- Free Space Map test
|
|
||||||
--
|
|
||||||
CREATE TABLE fsm_check_size (num int, str text);
|
|
||||||
-- Fill 3 blocks with as many large records as will fit
|
|
||||||
-- No FSM
|
|
||||||
INSERT INTO fsm_check_size SELECT i, rpad('', 1024, 'a')
|
|
||||||
FROM generate_series(1,7*3) i;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
|
|
||||||
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
heap_size | fsm_size
|
|
||||||
-----------+----------
|
|
||||||
24576 | 0
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Clear some space on block 0
|
|
||||||
DELETE FROM fsm_check_size WHERE num <= 5;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
-- Insert small record in block 2 to set the cached smgr targetBlock
|
|
||||||
INSERT INTO fsm_check_size VALUES(99, 'b');
|
|
||||||
-- Insert large record and make sure it goes in block 0 rather than
|
|
||||||
-- causing the relation to extend
|
|
||||||
INSERT INTO fsm_check_size VALUES (101, rpad('', 1024, 'a'));
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
|
|
||||||
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
heap_size | fsm_size
|
|
||||||
-----------+----------
|
|
||||||
24576 | 0
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Extend table with enough blocks to exceed the FSM threshold
|
|
||||||
-- FSM is created and extended to 3 blocks
|
|
||||||
INSERT INTO fsm_check_size SELECT i, 'c' FROM generate_series(200,1200) i;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
fsm_size
|
|
||||||
----------
|
|
||||||
24576
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Truncate heap to 1 block
|
|
||||||
-- No change in FSM
|
|
||||||
DELETE FROM fsm_check_size WHERE num > 7;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
fsm_size
|
|
||||||
----------
|
|
||||||
24576
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Truncate heap to 0 blocks
|
|
||||||
-- FSM now truncated to 2 blocks
|
|
||||||
DELETE FROM fsm_check_size;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
fsm_size
|
|
||||||
----------
|
|
||||||
16384
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Add long random string to extend TOAST table to 1 block
|
|
||||||
INSERT INTO fsm_check_size
|
|
||||||
VALUES(0, (SELECT string_agg(md5(chr(i)), '')
|
|
||||||
FROM generate_series(1,100) i));
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size(reltoastrelid, 'main') AS toast_size,
|
|
||||||
pg_relation_size(reltoastrelid, 'fsm') AS toast_fsm_size
|
|
||||||
FROM pg_class WHERE relname = 'fsm_check_size';
|
|
||||||
toast_size | toast_fsm_size
|
|
||||||
------------+----------------
|
|
||||||
8192 | 0
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
DROP TABLE fsm_check_size;
|
|
@ -68,12 +68,6 @@ test: create_aggregate create_function_3 create_cast constraints triggers inheri
|
|||||||
# ----------
|
# ----------
|
||||||
test: sanity_check
|
test: sanity_check
|
||||||
|
|
||||||
# ----------
|
|
||||||
# fsm does a delete followed by vacuum, and running it in parallel can prevent
|
|
||||||
# removal of rows.
|
|
||||||
# ----------
|
|
||||||
test: fsm
|
|
||||||
|
|
||||||
# ----------
|
# ----------
|
||||||
# Believe it or not, select creates a table, subsequent
|
# Believe it or not, select creates a table, subsequent
|
||||||
# tests need.
|
# tests need.
|
||||||
|
@ -80,7 +80,6 @@ test: roleattributes
|
|||||||
test: create_am
|
test: create_am
|
||||||
test: hash_func
|
test: hash_func
|
||||||
test: sanity_check
|
test: sanity_check
|
||||||
test: fsm
|
|
||||||
test: errors
|
test: errors
|
||||||
test: select
|
test: select
|
||||||
test: select_into
|
test: select_into
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
--
|
|
||||||
-- Free Space Map test
|
|
||||||
--
|
|
||||||
|
|
||||||
CREATE TABLE fsm_check_size (num int, str text);
|
|
||||||
|
|
||||||
-- Fill 3 blocks with as many large records as will fit
|
|
||||||
-- No FSM
|
|
||||||
INSERT INTO fsm_check_size SELECT i, rpad('', 1024, 'a')
|
|
||||||
FROM generate_series(1,7*3) i;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
|
|
||||||
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
|
|
||||||
-- Clear some space on block 0
|
|
||||||
DELETE FROM fsm_check_size WHERE num <= 5;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
|
|
||||||
-- Insert small record in block 2 to set the cached smgr targetBlock
|
|
||||||
INSERT INTO fsm_check_size VALUES(99, 'b');
|
|
||||||
|
|
||||||
-- Insert large record and make sure it goes in block 0 rather than
|
|
||||||
-- causing the relation to extend
|
|
||||||
INSERT INTO fsm_check_size VALUES (101, rpad('', 1024, 'a'));
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
|
|
||||||
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
|
|
||||||
-- Extend table with enough blocks to exceed the FSM threshold
|
|
||||||
-- FSM is created and extended to 3 blocks
|
|
||||||
INSERT INTO fsm_check_size SELECT i, 'c' FROM generate_series(200,1200) i;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
|
|
||||||
-- Truncate heap to 1 block
|
|
||||||
-- No change in FSM
|
|
||||||
DELETE FROM fsm_check_size WHERE num > 7;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
|
|
||||||
-- Truncate heap to 0 blocks
|
|
||||||
-- FSM now truncated to 2 blocks
|
|
||||||
DELETE FROM fsm_check_size;
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
|
|
||||||
|
|
||||||
-- Add long random string to extend TOAST table to 1 block
|
|
||||||
INSERT INTO fsm_check_size
|
|
||||||
VALUES(0, (SELECT string_agg(md5(chr(i)), '')
|
|
||||||
FROM generate_series(1,100) i));
|
|
||||||
VACUUM fsm_check_size;
|
|
||||||
SELECT pg_relation_size(reltoastrelid, 'main') AS toast_size,
|
|
||||||
pg_relation_size(reltoastrelid, 'fsm') AS toast_fsm_size
|
|
||||||
FROM pg_class WHERE relname = 'fsm_check_size';
|
|
||||||
|
|
||||||
DROP TABLE fsm_check_size;
|
|
Loading…
x
Reference in New Issue
Block a user