1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-20 05:03:10 +03:00

Improve bulk-insert performance by keeping the current target buffer pinned

(but not locked, as that would risk deadlocks).  Also, make it work in a small
ring of buffers to avoid having bulk inserts trash the whole buffer arena.

Robert Haas, after an idea of Simon Riggs'.
This commit is contained in:
Tom Lane
2008-11-06 20:51:15 +00:00
parent cdc197cf31
commit 85e2cedf98
12 changed files with 203 additions and 84 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
}
/*
* GetBulkInsertState - prepare status object for a bulk insert
*/
BulkInsertState
GetBulkInsertState(void)
{
BulkInsertState bistate;
bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
bistate->current_buf = InvalidBuffer;
return bistate;
}
/*
* FreeBulkInsertState - clean up after finishing a bulk insert
*/
void
FreeBulkInsertState(BulkInsertState bistate)
{
if (bistate->current_buf != InvalidBuffer)
ReleaseBuffer(bistate->current_buf);
FreeAccessStrategy(bistate->strategy);
pfree(bistate);
}
/*
* heap_insert - insert tuple into a heap
*
* The new tuple is stamped with current transaction ID and the specified
* command ID.
*
* If use_wal is false, the new tuple is not logged in WAL, even for a
* non-temp relation. Safe usage of this behavior requires that we arrange
* that all new tuples go into new pages not containing any tuples from other
* transactions, and that the relation gets fsync'd before commit.
* (See also heap_sync() comments)
* If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
* logged in WAL, even for a non-temp relation. Safe usage of this behavior
* requires that we arrange that all new tuples go into new pages not
* containing any tuples from other transactions, and that the relation gets
* fsync'd before commit. (See also heap_sync() comments)
*
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
* more info.
* The HEAP_INSERT_SKIP_FSM option is passed directly to
* RelationGetBufferForTuple, which see for more info.
*
* Note that use_wal and use_fsm will be applied when inserting into the
* heap's TOAST table, too, if the tuple requires any out-of-line data.
* Note that these options will be applied when inserting into the heap's
* TOAST table, too, if the tuple requires any out-of-line data.
*
* The BulkInsertState object (if any; bistate can be NULL for default
* behavior) is also just passed through to RelationGetBufferForTuple.
*
* The return value is the OID assigned to the tuple (either here or by the
* caller), or InvalidOid if no OID. The header fields of *tup are updated
@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm)
int options, BulkInsertState bistate)
{
TransactionId xid = GetCurrentTransactionId();
HeapTuple heaptup;
@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
heaptup = tup;
}
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
heaptup = toast_insert_or_update(relation, tup, NULL,
use_wal, use_fsm);
heaptup = toast_insert_or_update(relation, tup, NULL, options);
else
heaptup = tup;
/* Find buffer to insert this tuple into */
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, use_fsm);
InvalidBuffer, options, bistate);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
MarkBufferDirty(buffer);
/* XLOG stuff */
if (use_wal && !relation->rd_istemp)
if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp)
{
xl_heap_insert xlrec;
xl_heap_header xlhdr;
@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
}
/*
@ -2595,8 +2624,7 @@ l2:
if (need_toast)
{
/* Note we always use WAL and FSM during updates */
heaptup = toast_insert_or_update(relation, newtup, &oldtup,
true, true);
heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0);
newtupsize = MAXALIGN(heaptup->t_len);
}
else
@ -2623,7 +2651,7 @@ l2:
{
/* Assume there's no chance to put heaptup on same page. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
buffer, true);
buffer, 0, NULL);
}
else
{
@ -2640,7 +2668,7 @@ l2:
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
buffer, true);
buffer, 0, NULL);
}
else
{

View File

@ -8,13 +8,14 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
((HeapTupleHeader) item)->t_ctid = tuple->t_self;
}
/*
* Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
*/
static Buffer
ReadBufferBI(Relation relation, BlockNumber targetBlock,
BulkInsertState bistate)
{
Buffer buffer;
/* If not bulk-insert, exactly like ReadBuffer */
if (!bistate)
return ReadBuffer(relation, targetBlock);
/* If we have the desired block already pinned, re-pin and return it */
if (bistate->current_buf != InvalidBuffer)
{
if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
{
IncrBufferRefCount(bistate->current_buf);
return bistate->current_buf;
}
/* ... else drop the old buffer */
ReleaseBuffer(bistate->current_buf);
bistate->current_buf = InvalidBuffer;
}
/* Perform a read using the buffer strategy */
buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
RBM_NORMAL, bistate->strategy);
/* Save the selected block as target for future inserts */
IncrBufferRefCount(buffer);
bistate->current_buf = buffer;
return buffer;
}
/*
* RelationGetBufferForTuple
*
@ -80,13 +118,13 @@ RelationPutHeapTuple(Relation relation,
* happen if space is freed in that page after heap_update finds there's not
* enough there). In that case, the page will be pinned and locked only once.
*
* If use_fsm is true (the normal case), we use FSM to help us find free
* space. If use_fsm is false, we always append a new empty page to the
* end of the relation if the tuple won't fit on the current target page.
* We normally use FSM to help us find free space. However,
* if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
* the end of the relation if the tuple won't fit on the current target page.
* This can save some cycles when we know the relation is new and doesn't
* contain useful amounts of free space.
*
* The use_fsm = false case is also useful for non-WAL-logged additions to a
* HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
* relation, if the caller holds exclusive lock and is careful to invalidate
* relation->rd_targblock before the first insertion --- that ensures that
* all insertions will occur into newly added pages and not be intermixed
@ -94,6 +132,12 @@ RelationPutHeapTuple(Relation relation,
* any committed data of other transactions. (See heap_insert's comments
* for additional constraints needed for safe usage of this behavior.)
*
* The caller can also provide a BulkInsertState object to optimize many
* insertions into the same relation. This keeps a pin on the current
* insertion target page (to save pin/unpin cycles) and also passes a
* BULKWRITE buffer selection strategy object to the buffer manager.
* Passing NULL for bistate selects the default behavior.
*
* We always try to avoid filling existing pages further than the fillfactor.
* This is OK since this routine is not consulted when updating a tuple and
* keeping it on the same page, which is the scenario fillfactor is meant
@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
*/
Buffer
RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer, bool use_fsm)
Buffer otherBuffer, int options,
struct BulkInsertStateData *bistate)
{
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
Buffer buffer = InvalidBuffer;
Page page;
Size pageFreeSpace,
@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
len = MAXALIGN(len); /* be conservative */
/* Bulk insert is not supported for updates, only inserts. */
Assert(otherBuffer == InvalidBuffer || !bistate);
/*
* If we're gonna fail for oversize tuple, do it right away
*/
@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* We first try to put the tuple on the same page we last inserted a tuple
* on, as cached in the relcache entry. If that doesn't work, we ask the
* shared Free Space Map to locate a suitable page. Since the FSM's info
* might be out of date, we have to be prepared to loop around and retry
* multiple times. (To insure this isn't an infinite loop, we must update
* the FSM with the correct amount of free space on each page that proves
* not to be suitable.) If the FSM has no record of a page with enough
* free space, we give up and extend the relation.
* on, as cached in the BulkInsertState or relcache entry. If that
* doesn't work, we ask the Free Space Map to locate a suitable page.
* Since the FSM's info might be out of date, we have to be prepared to
* loop around and retry multiple times. (To insure this isn't an infinite
* loop, we must update the FSM with the correct amount of free space on
* each page that proves not to be suitable.) If the FSM has no record of
* a page with enough free space, we give up and extend the relation.
*
* When use_fsm is false, we either put the tuple onto the existing target
* page or extend the relation.
*/
if (len + saveFreeSpace <= MaxHeapTupleSize)
targetBlock = relation->rd_targblock;
else
if (len + saveFreeSpace > MaxHeapTupleSize)
{
/* can't fit, don't screw up FSM request tracking by trying */
/* can't fit, don't bother asking FSM */
targetBlock = InvalidBlockNumber;
use_fsm = false;
}
else if (bistate && bistate->current_buf != InvalidBuffer)
targetBlock = BufferGetBlockNumber(bistate->current_buf);
else
targetBlock = relation->rd_targblock;
if (targetBlock == InvalidBlockNumber && use_fsm)
{
@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
if (otherBuffer == InvalidBuffer)
{
/* easy case */
buffer = ReadBuffer(relation, targetBlock);
buffer = ReadBufferBI(relation, targetBlock, bistate);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
else if (otherBlock == targetBlock)
@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* it worth keeping an accurate file length in shared memory someplace,
* rather than relying on the kernel to do it for us?
*/
buffer = ReadBuffer(relation, P_NEW);
buffer = ReadBufferBI(relation, P_NEW, bistate);
/*
* We can be certain that locking the otherBuffer first is OK, since it

View File

@ -96,7 +96,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
}
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
state->rs_use_wal, false);
HEAP_INSERT_SKIP_FSM |
(state->rs_use_wal ?
0 : HEAP_INSERT_SKIP_WAL));
else
heaptup = tup;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.90 2008/11/02 01:45:27 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.91 2008/11/06 20:51:14 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -74,8 +74,7 @@ do { \
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value,
bool use_wal, bool use_fsm);
static Datum toast_save_datum(Relation rel, Datum value, int options);
static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length);
@ -400,7 +399,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
* Inputs:
* newtup: the candidate new tuple to be inserted
* oldtup: the old row version for UPDATE, or NULL for INSERT
* use_wal, use_fsm: flags to be passed to heap_insert() for toast rows
* options: options to be passed to heap_insert() for toast rows
* Result:
* either newtup if no toasting is needed, or a palloc'd modified tuple
* that is what should actually get stored
@ -411,7 +410,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
*/
HeapTuple
toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
bool use_wal, bool use_fsm)
int options)
{
HeapTuple result_tuple;
TupleDesc tupleDesc;
@ -677,8 +676,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
{
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i],
use_wal, use_fsm);
toast_values[i] = toast_save_datum(rel, toast_values[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
toast_free[i] = true;
@ -728,8 +726,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i],
use_wal, use_fsm);
toast_values[i] = toast_save_datum(rel, toast_values[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
toast_free[i] = true;
@ -838,8 +835,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i],
use_wal, use_fsm);
toast_values[i] = toast_save_datum(rel, toast_values[i], options);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
toast_free[i] = true;
@ -1120,8 +1116,7 @@ toast_compress_datum(Datum value)
* ----------
*/
static Datum
toast_save_datum(Relation rel, Datum value,
bool use_wal, bool use_fsm)
toast_save_datum(Relation rel, Datum value, int options)
{
Relation toastrel;
Relation toastidx;
@ -1218,7 +1213,7 @@ toast_save_datum(Relation rel, Datum value,
memcpy(VARDATA(&chunk_data), data_p, chunk_size);
toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);
heap_insert(toastrel, toasttup, mycid, options, NULL);
/*
* Create the index entry. We cheat a little here by not using