mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Accept slightly-filled pages for tuples larger than fillfactor.
We always inserted a larger-than-fillfactor tuple into a newly-extended page, even when existing pages were empty or contained nothing but an unused line pointer. This was unnecessary relation extension. Start tolerating page usage up to 1/8 the maximum space that could be taken up by line pointers. This is somewhat arbitrary, but it should allow more cases to reuse pages. This has no effect on tables with fillfactor=100 (the default). John Naylor and Floris van Nee. Reviewed by Matthias van de Meent. Reported by Floris van Nee. Discussion: https://postgr.es/m/6e263217180649339720afe2176c50aa@opammb0562.comp.optiver.com
This commit is contained in:
parent
7ef64e7e72
commit
0ff8bbdee1
@ -317,10 +317,10 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
|||||||
* BULKWRITE buffer selection strategy object to the buffer manager.
|
* BULKWRITE buffer selection strategy object to the buffer manager.
|
||||||
* Passing NULL for bistate selects the default behavior.
|
* Passing NULL for bistate selects the default behavior.
|
||||||
*
|
*
|
||||||
* We always try to avoid filling existing pages further than the fillfactor.
|
* We don't fill existing pages further than the fillfactor, except for large
|
||||||
* This is OK since this routine is not consulted when updating a tuple and
|
* tuples in nearly-empty pages. This is OK since this routine is not
|
||||||
* keeping it on the same page, which is the scenario fillfactor is meant
|
* consulted when updating a tuple and keeping it on the same page, which is
|
||||||
* to reserve space for.
|
* the scenario fillfactor is meant to reserve space for.
|
||||||
*
|
*
|
||||||
* ereport(ERROR) is allowed here, so this routine *must* be called
|
* ereport(ERROR) is allowed here, so this routine *must* be called
|
||||||
* before any (unlogged) changes are made in buffer pool.
|
* before any (unlogged) changes are made in buffer pool.
|
||||||
@ -334,8 +334,10 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
|||||||
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
|
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
|
||||||
Buffer buffer = InvalidBuffer;
|
Buffer buffer = InvalidBuffer;
|
||||||
Page page;
|
Page page;
|
||||||
Size pageFreeSpace = 0,
|
Size nearlyEmptyFreeSpace,
|
||||||
saveFreeSpace = 0;
|
pageFreeSpace = 0,
|
||||||
|
saveFreeSpace = 0,
|
||||||
|
targetFreeSpace = 0;
|
||||||
BlockNumber targetBlock,
|
BlockNumber targetBlock,
|
||||||
otherBlock;
|
otherBlock;
|
||||||
bool needLock;
|
bool needLock;
|
||||||
@ -358,6 +360,19 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
|||||||
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
||||||
HEAP_DEFAULT_FILLFACTOR);
|
HEAP_DEFAULT_FILLFACTOR);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since pages without tuples can still have line pointers, we consider
|
||||||
|
* pages "empty" when the unavailable space is slight. This threshold is
|
||||||
|
* somewhat arbitrary, but it should prevent most unnecessary relation
|
||||||
|
* extensions while inserting large tuples into low-fillfactor tables.
|
||||||
|
*/
|
||||||
|
nearlyEmptyFreeSpace = MaxHeapTupleSize -
|
||||||
|
(MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
|
||||||
|
if (len + saveFreeSpace > nearlyEmptyFreeSpace)
|
||||||
|
targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
|
||||||
|
else
|
||||||
|
targetFreeSpace = len + saveFreeSpace;
|
||||||
|
|
||||||
if (otherBuffer != InvalidBuffer)
|
if (otherBuffer != InvalidBuffer)
|
||||||
otherBlock = BufferGetBlockNumber(otherBuffer);
|
otherBlock = BufferGetBlockNumber(otherBuffer);
|
||||||
else
|
else
|
||||||
@ -376,13 +391,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
|||||||
* When use_fsm is false, we either put the tuple onto the existing target
|
* When use_fsm is false, we either put the tuple onto the existing target
|
||||||
* page or extend the relation.
|
* page or extend the relation.
|
||||||
*/
|
*/
|
||||||
if (len + saveFreeSpace > MaxHeapTupleSize)
|
if (bistate && bistate->current_buf != InvalidBuffer)
|
||||||
{
|
|
||||||
/* can't fit, don't bother asking FSM */
|
|
||||||
targetBlock = InvalidBlockNumber;
|
|
||||||
use_fsm = false;
|
|
||||||
}
|
|
||||||
else if (bistate && bistate->current_buf != InvalidBuffer)
|
|
||||||
targetBlock = BufferGetBlockNumber(bistate->current_buf);
|
targetBlock = BufferGetBlockNumber(bistate->current_buf);
|
||||||
else
|
else
|
||||||
targetBlock = RelationGetTargetBlock(relation);
|
targetBlock = RelationGetTargetBlock(relation);
|
||||||
@ -393,7 +402,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
|||||||
* We have no cached target page, so ask the FSM for an initial
|
* We have no cached target page, so ask the FSM for an initial
|
||||||
* target.
|
* target.
|
||||||
*/
|
*/
|
||||||
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
|
targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -517,7 +526,7 @@ loop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
pageFreeSpace = PageGetHeapFreeSpace(page);
|
pageFreeSpace = PageGetHeapFreeSpace(page);
|
||||||
if (len + saveFreeSpace <= pageFreeSpace)
|
if (targetFreeSpace <= pageFreeSpace)
|
||||||
{
|
{
|
||||||
/* use this page as future insert target, too */
|
/* use this page as future insert target, too */
|
||||||
RelationSetTargetBlock(relation, targetBlock);
|
RelationSetTargetBlock(relation, targetBlock);
|
||||||
@ -550,7 +559,7 @@ loop:
|
|||||||
targetBlock = RecordAndGetPageWithFreeSpace(relation,
|
targetBlock = RecordAndGetPageWithFreeSpace(relation,
|
||||||
targetBlock,
|
targetBlock,
|
||||||
pageFreeSpace,
|
pageFreeSpace,
|
||||||
len + saveFreeSpace);
|
targetFreeSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -582,7 +591,7 @@ loop:
|
|||||||
* Check if some other backend has extended a block for us while
|
* Check if some other backend has extended a block for us while
|
||||||
* we were waiting on the lock.
|
* we were waiting on the lock.
|
||||||
*/
|
*/
|
||||||
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
|
targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If some other waiter has already extended the relation, we
|
* If some other waiter has already extended the relation, we
|
||||||
|
@ -676,7 +676,11 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
|||||||
|
|
||||||
if (len + saveFreeSpace > pageFreeSpace)
|
if (len + saveFreeSpace > pageFreeSpace)
|
||||||
{
|
{
|
||||||
/* Doesn't fit, so write out the existing page */
|
/*
|
||||||
|
* Doesn't fit, so write out the existing page. It always
|
||||||
|
* contains a tuple. Hence, unlike RelationGetBufferForTuple(),
|
||||||
|
* enforce saveFreeSpace unconditionally.
|
||||||
|
*/
|
||||||
|
|
||||||
/* XLOG stuff */
|
/* XLOG stuff */
|
||||||
if (RelationNeedsWAL(state->rs_new_rel))
|
if (RelationNeedsWAL(state->rs_new_rel))
|
||||||
|
@ -82,6 +82,27 @@ select col1, col2, char_length(col3) from inserttest;
|
|||||||
|
|
||||||
drop table inserttest;
|
drop table inserttest;
|
||||||
--
|
--
|
||||||
|
-- tuple larger than fillfactor
|
||||||
|
--
|
||||||
|
CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
|
||||||
|
ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
|
||||||
|
-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
|
||||||
|
INSERT INTO large_tuple_test (select 1, NULL);
|
||||||
|
-- should still fit on the page
|
||||||
|
INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
|
||||||
|
SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
|
||||||
|
pg_size_pretty
|
||||||
|
----------------
|
||||||
|
8192 bytes
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- add small record to the second page
|
||||||
|
INSERT INTO large_tuple_test (select 3, NULL);
|
||||||
|
-- now this tuple won't fit on the second page, but the insert should
|
||||||
|
-- still succeed by extending the relation
|
||||||
|
INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
|
||||||
|
DROP TABLE large_tuple_test;
|
||||||
|
--
|
||||||
-- check indirection (field/array assignment), cf bug #14265
|
-- check indirection (field/array assignment), cf bug #14265
|
||||||
--
|
--
|
||||||
-- these tests are aware that transformInsertStmt has 3 separate code paths
|
-- these tests are aware that transformInsertStmt has 3 separate code paths
|
||||||
|
@ -37,6 +37,28 @@ select col1, col2, char_length(col3) from inserttest;
|
|||||||
|
|
||||||
drop table inserttest;
|
drop table inserttest;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- tuple larger than fillfactor
|
||||||
|
--
|
||||||
|
CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
|
||||||
|
ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
|
||||||
|
|
||||||
|
-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
|
||||||
|
INSERT INTO large_tuple_test (select 1, NULL);
|
||||||
|
|
||||||
|
-- should still fit on the page
|
||||||
|
INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
|
||||||
|
SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
|
||||||
|
|
||||||
|
-- add small record to the second page
|
||||||
|
INSERT INTO large_tuple_test (select 3, NULL);
|
||||||
|
|
||||||
|
-- now this tuple won't fit on the second page, but the insert should
|
||||||
|
-- still succeed by extending the relation
|
||||||
|
INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
|
||||||
|
|
||||||
|
DROP TABLE large_tuple_test;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- check indirection (field/array assignment), cf bug #14265
|
-- check indirection (field/array assignment), cf bug #14265
|
||||||
--
|
--
|
||||||
|
Loading…
x
Reference in New Issue
Block a user