diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 75223c9beaf..37a1be4114e 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -317,10 +317,10 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) * BULKWRITE buffer selection strategy object to the buffer manager. * Passing NULL for bistate selects the default behavior. * - * We always try to avoid filling existing pages further than the fillfactor. - * This is OK since this routine is not consulted when updating a tuple and - * keeping it on the same page, which is the scenario fillfactor is meant - * to reserve space for. + * We don't fill existing pages further than the fillfactor, except for large + * tuples in nearly-empty pages. This is OK since this routine is not + * consulted when updating a tuple and keeping it on the same page, which is + * the scenario fillfactor is meant to reserve space for. * * ereport(ERROR) is allowed here, so this routine *must* be called * before any (unlogged) changes are made in buffer pool. @@ -334,8 +334,10 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace = 0, - saveFreeSpace = 0; + Size nearlyEmptyFreeSpace, + pageFreeSpace = 0, + saveFreeSpace = 0, + targetFreeSpace = 0; BlockNumber targetBlock, otherBlock; bool needLock; @@ -358,6 +360,19 @@ RelationGetBufferForTuple(Relation relation, Size len, saveFreeSpace = RelationGetTargetPageFreeSpace(relation, HEAP_DEFAULT_FILLFACTOR); + /* + * Since pages without tuples can still have line pointers, we consider + * pages "empty" when the unavailable space is slight. This threshold is + * somewhat arbitrary, but it should prevent most unnecessary relation + * extensions while inserting large tuples into low-fillfactor tables. + */ + nearlyEmptyFreeSpace = MaxHeapTupleSize - + (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData)); + if (len + saveFreeSpace > nearlyEmptyFreeSpace) + targetFreeSpace = Max(len, nearlyEmptyFreeSpace); + else + targetFreeSpace = len + saveFreeSpace; + if (otherBuffer != InvalidBuffer) otherBlock = BufferGetBlockNumber(otherBuffer); else @@ -376,13 +391,7 @@ RelationGetBufferForTuple(Relation relation, Size len, * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ - if (len + saveFreeSpace > MaxHeapTupleSize) - { - /* can't fit, don't bother asking FSM */ - targetBlock = InvalidBlockNumber; - use_fsm = false; - } - else if (bistate && bistate->current_buf != InvalidBuffer) + if (bistate && bistate->current_buf != InvalidBuffer) targetBlock = BufferGetBlockNumber(bistate->current_buf); else targetBlock = RelationGetTargetBlock(relation); @@ -393,7 +402,7 @@ RelationGetBufferForTuple(Relation relation, Size len, * We have no cached target page, so ask the FSM for an initial * target. */ - targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); + targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace); } /* @@ -517,7 +526,7 @@ loop: } pageFreeSpace = PageGetHeapFreeSpace(page); - if (len + saveFreeSpace <= pageFreeSpace) + if (targetFreeSpace <= pageFreeSpace) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); @@ -550,7 +559,7 @@ loop: targetBlock = RecordAndGetPageWithFreeSpace(relation, targetBlock, pageFreeSpace, - len + saveFreeSpace); + targetFreeSpace); } /* @@ -582,7 +591,7 @@ loop: * Check if some other backend has extended a block for us while * we were waiting on the lock. */ - targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); + targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace); /* * If some other waiter has already extended the relation, we diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index cf13e6002bb..1aff62cd423 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -676,7 +676,11 @@ raw_heap_insert(RewriteState state, HeapTuple tup) if (len + saveFreeSpace > pageFreeSpace) { - /* Doesn't fit, so write out the existing page */ + /* + * Doesn't fit, so write out the existing page. It always + * contains a tuple. Hence, unlike RelationGetBufferForTuple(), + * enforce saveFreeSpace unconditionally. + */ /* XLOG stuff */ if (RelationNeedsWAL(state->rs_new_rel)) diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index da50ee3b670..5063a3dc221 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -82,6 +82,27 @@ select col1, col2, char_length(col3) from inserttest; drop table inserttest; -- +-- tuple larger than fillfactor +-- +CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10); +ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain; +-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize) +INSERT INTO large_tuple_test (select 1, NULL); +-- should still fit on the page +INSERT INTO large_tuple_test (select 2, repeat('a', 1000)); +SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); + pg_size_pretty +---------------- + 8192 bytes +(1 row) + +-- add small record to the second page +INSERT INTO large_tuple_test (select 3, NULL); +-- now this tuple won't fit on the second page, but the insert should +-- still succeed by extending the relation +INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +DROP TABLE large_tuple_test; +-- -- check indirection (field/array assignment), cf bug #14265 -- -- these tests are aware that transformInsertStmt has 3 separate code paths diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index 963faa1614c..bfaa8a3b277 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -37,6 +37,28 @@ select col1, col2, char_length(col3) from inserttest; drop table inserttest; +-- +-- tuple larger than fillfactor +-- +CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10); +ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain; + +-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize) +INSERT INTO large_tuple_test (select 1, NULL); + +-- should still fit on the page +INSERT INTO large_tuple_test (select 2, repeat('a', 1000)); +SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); + +-- add small record to the second page +INSERT INTO large_tuple_test (select 3, NULL); + +-- now this tuple won't fit on the second page, but the insert should +-- still succeed by extending the relation +INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); + +DROP TABLE large_tuple_test; + -- -- check indirection (field/array assignment), cf bug #14265 --